xref: /petsc/src/tao/pde_constrained/tutorials/parabolic.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /*  Number of variables */
5   PetscInt m; /*  Number of constraints per time step */
6   PetscInt mx; /*  grid points in each direction */
7   PetscInt nt; /*  Number of time steps; as of now, must be divisible by 8 */
8   PetscInt ndata; /*  Number of data points per sample */
9   PetscInt ns; /*  Number of samples */
10   PetscInt *sample_times; /*  Times of samples */
11   IS       s_is;
12   IS       d_is;
13 
14   VecScatter state_scatter;
15   VecScatter design_scatter;
16   VecScatter *yi_scatter;
17   VecScatter *di_scatter;
18 
19   Mat       Js,Jd,JsBlockPrec,JsInv,JsBlock;
20   PetscBool jformed,dsg_formed;
21 
22   PetscReal alpha; /*  Regularization parameter */
23   PetscReal beta; /*  Weight attributed to ||u||^2 in regularization functional */
24   PetscReal noise; /*  Amount of noise to add to data */
25   PetscReal ht; /*  Time step */
26 
27   Mat Qblock,QblockT;
28   Mat L,LT;
29   Mat Div,Divwork;
30   Mat Grad;
31   Mat Av,Avwork,AvT;
32   Mat DSG;
33   Vec q;
34   Vec ur; /*  reference */
35 
36   Vec d;
37   Vec dwork;
38   Vec *di;
39 
40   Vec y; /*  state variables */
41   Vec ywork;
42 
43   Vec ytrue;
44   Vec *yi,*yiwork;
45 
46   Vec u; /*  design variables */
47   Vec uwork;
48 
49   Vec utrue;
50   Vec js_diag;
51   Vec c; /*  constraint vector */
52   Vec cwork;
53 
54   Vec lwork;
55   Vec S;
56   Vec Rwork,Swork,Twork;
57   Vec Av_u;
58 
59   KSP solver;
60   PC prec;
61 
62   PetscInt ksp_its;
63   PetscInt ksp_its_initial;
64 } AppCtx;
65 
66 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
67 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
68 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
69 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
70 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void*);
71 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
72 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
73 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
74 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
75 PetscErrorCode ParabolicInitialize(AppCtx *user);
76 PetscErrorCode ParabolicDestroy(AppCtx *user);
77 PetscErrorCode ParabolicMonitor(Tao, void*);
78 
79 PetscErrorCode StateMatMult(Mat,Vec,Vec);
80 PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
81 PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
82 PetscErrorCode StateMatGetDiagonal(Mat,Vec);
83 PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
84 PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
85 PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
86 PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);
87 
88 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
89 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
90 
91 PetscErrorCode Gather_i(Vec,Vec*,VecScatter*,PetscInt);
92 PetscErrorCode Scatter_i(Vec,Vec*,VecScatter*,PetscInt);
93 
94 static  char help[]="";
95 
96 int main(int argc, char **argv)
97 {
98   PetscErrorCode     ierr;
99   Vec                x,x0;
100   Tao                tao;
101   AppCtx             user;
102   IS                 is_allstate,is_alldesign;
103   PetscInt           lo,hi,hi2,lo2,ksp_old;
104   PetscInt           ntests = 1;
105   PetscInt           i;
106 #if defined(PETSC_USE_LOG)
107   PetscLogStage      stages[1];
108 #endif
109 
110   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
111   user.mx = 8;
112   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"parabolic example",NULL);PetscCall(ierr);
113   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
114   user.nt = 8;
115   PetscCall(PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,NULL));
116   user.ndata = 64;
117   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
118   user.ns = 8;
119   PetscCall(PetscOptionsInt("-ns","Number of samples","",user.ns,&user.ns,NULL));
120   user.alpha = 1.0;
121   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
122   user.beta = 0.01;
123   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
124   user.noise = 0.01;
125   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
126   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
127   ierr = PetscOptionsEnd();PetscCall(ierr);
128 
129   user.m = user.mx*user.mx*user.mx; /*  number of constraints per time step */
130   user.n = user.m*(user.nt+1); /*  number of variables */
131   user.ht = (PetscReal)1/user.nt;
132 
133   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.u));
134   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.y));
135   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.c));
136   PetscCall(VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m*user.nt));
137   PetscCall(VecSetSizes(user.y,PETSC_DECIDE,user.m*user.nt));
138   PetscCall(VecSetSizes(user.c,PETSC_DECIDE,user.m*user.nt));
139   PetscCall(VecSetFromOptions(user.u));
140   PetscCall(VecSetFromOptions(user.y));
141   PetscCall(VecSetFromOptions(user.c));
142 
143   /* Create scatters for reduced spaces.
144      If the state vector y and design vector u are partitioned as
145      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
146      then the solution vector x is organized as
147      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
148      The index sets user.s_is and user.d_is correspond to the indices of the
149      state and design variables owned by the current processor.
150   */
151   PetscCall(VecCreate(PETSC_COMM_WORLD,&x));
152 
153   PetscCall(VecGetOwnershipRange(user.y,&lo,&hi));
154   PetscCall(VecGetOwnershipRange(user.u,&lo2,&hi2));
155 
156   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
157   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is));
158 
159   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
160   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is));
161 
162   PetscCall(VecSetSizes(x,hi-lo+hi2-lo2,user.n));
163   PetscCall(VecSetFromOptions(x));
164 
165   PetscCall(VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter));
166   PetscCall(VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter));
167   PetscCall(ISDestroy(&is_alldesign));
168   PetscCall(ISDestroy(&is_allstate));
169 
170   /* Create TAO solver and set desired solution method */
171   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
172   PetscCall(TaoSetType(tao,TAOLCL));
173 
174   /* Set up initial vectors and matrices */
175   PetscCall(ParabolicInitialize(&user));
176 
177   PetscCall(Gather(x,user.y,user.state_scatter,user.u,user.design_scatter));
178   PetscCall(VecDuplicate(x,&x0));
179   PetscCall(VecCopy(x,x0));
180 
181   /* Set solution vector with an initial guess */
182   PetscCall(TaoSetSolution(tao,x));
183   PetscCall(TaoSetObjective(tao, FormFunction, &user));
184   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
185   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
186 
187   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, user.JsBlockPrec, user.JsInv, FormJacobianState, &user));
188   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
189 
190   PetscCall(TaoSetFromOptions(tao));
191   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
192 
193  /* SOLVE THE APPLICATION */
194   PetscCall(PetscLogStageRegister("Trials",&stages[0]));
195   PetscCall(PetscLogStagePush(stages[0]));
196   user.ksp_its_initial = user.ksp_its;
197   ksp_old = user.ksp_its;
198   for (i=0; i<ntests; i++) {
199     PetscCall(TaoSolve(tao));
200     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old));
201     PetscCall(VecCopy(x0,x));
202     PetscCall(TaoSetSolution(tao,x));
203   }
204   PetscCall(PetscLogStagePop());
205   PetscCall(PetscBarrier((PetscObject)x));
206   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
207   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial));
208   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests));
209   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its));
210   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: "));
211   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests));
212 
213   PetscCall(TaoDestroy(&tao));
214   PetscCall(VecDestroy(&x));
215   PetscCall(VecDestroy(&x0));
216   PetscCall(ParabolicDestroy(&user));
217 
218   PetscCall(PetscFinalize());
219   return 0;
220 }
221 /* ------------------------------------------------------------------- */
222 /*
223    dwork = Qy - d
224    lwork = L*(u-ur)
225    f = 1/2 * (dwork.dork + alpha*lwork.lwork)
226 */
227 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
228 {
229   PetscReal      d1=0,d2=0;
230   PetscInt       i,j;
231   AppCtx         *user = (AppCtx*)ptr;
232 
233   PetscFunctionBegin;
234   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
235   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
236   for (j=0; j<user->ns; j++) {
237     i = user->sample_times[j];
238     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
239   }
240   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
241   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
242   PetscCall(VecDot(user->dwork,user->dwork,&d1));
243 
244   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
245   PetscCall(MatMult(user->L,user->uwork,user->lwork));
246   PetscCall(VecDot(user->lwork,user->lwork,&d2));
247 
248   *f = 0.5 * (d1 + user->alpha*d2);
249   PetscFunctionReturn(0);
250 }
251 
252 /* ------------------------------------------------------------------- */
253 /*
254     state: g_s = Q' *(Qy - d)
255     design: g_d = alpha*L'*L*(u-ur)
256 */
257 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
258 {
259   PetscInt       i,j;
260   AppCtx         *user = (AppCtx*)ptr;
261 
262   PetscFunctionBegin;
263   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
264   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
265   for (j=0; j<user->ns; j++) {
266     i = user->sample_times[j];
267     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
268   }
269   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
270   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
271   PetscCall(Scatter_i(user->dwork,user->di,user->di_scatter,user->ns));
272   PetscCall(VecSet(user->ywork,0.0));
273   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
274   for (j=0; j<user->ns; j++) {
275     i = user->sample_times[j];
276     PetscCall(MatMult(user->QblockT,user->di[j],user->yiwork[i]));
277   }
278   PetscCall(Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
279 
280   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
281   PetscCall(MatMult(user->L,user->uwork,user->lwork));
282   PetscCall(MatMult(user->LT,user->lwork,user->uwork));
283   PetscCall(VecScale(user->uwork, user->alpha));
284   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
289 {
290   PetscReal      d1,d2;
291   PetscInt       i,j;
292   AppCtx         *user = (AppCtx*)ptr;
293 
294   PetscFunctionBegin;
295   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
296   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
297   for (j=0; j<user->ns; j++) {
298     i = user->sample_times[j];
299     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
300   }
301   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
302   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
303   PetscCall(VecDot(user->dwork,user->dwork,&d1));
304   PetscCall(Scatter_i(user->dwork,user->di,user->di_scatter,user->ns));
305   PetscCall(VecSet(user->ywork,0.0));
306   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
307   for (j=0; j<user->ns; j++) {
308     i = user->sample_times[j];
309     PetscCall(MatMult(user->QblockT,user->di[j],user->yiwork[i]));
310   }
311   PetscCall(Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
312 
313   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
314   PetscCall(MatMult(user->L,user->uwork,user->lwork));
315   PetscCall(VecDot(user->lwork,user->lwork,&d2));
316   PetscCall(MatMult(user->LT,user->lwork,user->uwork));
317   PetscCall(VecScale(user->uwork, user->alpha));
318   *f = 0.5 * (d1 + user->alpha*d2);
319 
320   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
321   PetscFunctionReturn(0);
322 }
323 
324 /* ------------------------------------------------------------------- */
325 /* A
326 MatShell object
327 */
328 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
329 {
330   AppCtx         *user = (AppCtx*)ptr;
331 
332   PetscFunctionBegin;
333   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
334   PetscCall(VecSet(user->uwork,0));
335   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
336   PetscCall(VecExp(user->uwork));
337   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
338   PetscCall(VecCopy(user->Av_u,user->Swork));
339   PetscCall(VecReciprocal(user->Swork));
340   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
341   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
342   if (user->dsg_formed) {
343     PetscCall(MatProductNumeric(user->DSG));
344   } else {
345     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
346     user->dsg_formed = PETSC_TRUE;
347   }
348 
349   /* B = speye(nx^3) + ht*DSG; */
350   PetscCall(MatScale(user->DSG,user->ht));
351   PetscCall(MatShift(user->DSG,1.0));
352   PetscFunctionReturn(0);
353 }
354 
355 /* ------------------------------------------------------------------- */
356 /* B */
357 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
358 {
359   AppCtx         *user = (AppCtx*)ptr;
360 
361   PetscFunctionBegin;
362   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
363   PetscFunctionReturn(0);
364 }
365 
366 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
367 {
368   PetscInt       i;
369   AppCtx         *user;
370 
371   PetscFunctionBegin;
372   PetscCall(MatShellGetContext(J_shell,&user));
373   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
374   PetscCall(MatMult(user->JsBlock,user->yi[0],user->yiwork[0]));
375   for (i=1; i<user->nt; i++) {
376     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
377     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]));
378   }
379   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
384 {
385   PetscInt       i;
386   AppCtx         *user;
387 
388   PetscFunctionBegin;
389   PetscCall(MatShellGetContext(J_shell,&user));
390   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
391   for (i=0; i<user->nt-1; i++) {
392     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
393     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i+1]));
394   }
395   i = user->nt-1;
396   PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
397   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
398   PetscFunctionReturn(0);
399 }
400 
401 PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
402 {
403   AppCtx         *user;
404 
405   PetscFunctionBegin;
406   PetscCall(MatShellGetContext(J_shell,&user));
407   PetscCall(MatMult(user->Grad,X,user->Swork));
408   PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
409   PetscCall(MatMult(user->Div,user->Swork,Y));
410   PetscCall(VecAYPX(Y,user->ht,X));
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
415 {
416   PetscInt       i;
417   AppCtx         *user;
418 
419   PetscFunctionBegin;
420   PetscCall(MatShellGetContext(J_shell,&user));
421 
422   /* sdiag(1./v) */
423   PetscCall(VecSet(user->uwork,0));
424   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
425   PetscCall(VecExp(user->uwork));
426 
427   /* sdiag(1./((Av*(1./v)).^2)) */
428   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
429   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
430   PetscCall(VecReciprocal(user->Swork));
431 
432   /* (Av * (sdiag(1./v) * b)) */
433   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
434   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
435 
436   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
437   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
438 
439   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
440   for (i=0; i<user->nt; i++) {
441     /* (sdiag(Grad*y(:,i)) */
442     PetscCall(MatMult(user->Grad,user->yi[i],user->Twork));
443 
444     /* ht * Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
445     PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
446     PetscCall(MatMult(user->Div,user->Twork,user->yiwork[i]));
447     PetscCall(VecScale(user->yiwork[i],user->ht));
448   }
449   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
450 
451   PetscFunctionReturn(0);
452 }
453 
454 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
455 {
456   PetscInt       i;
457   AppCtx         *user;
458 
459   PetscFunctionBegin;
460   PetscCall(MatShellGetContext(J_shell,&user));
461 
462   /* sdiag(1./((Av*(1./v)).^2)) */
463   PetscCall(VecSet(user->uwork,0));
464   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
465   PetscCall(VecExp(user->uwork));
466   PetscCall(MatMult(user->Av,user->uwork,user->Rwork));
467   PetscCall(VecPointwiseMult(user->Rwork,user->Rwork,user->Rwork));
468   PetscCall(VecReciprocal(user->Rwork));
469 
470   PetscCall(VecSet(Y,0.0));
471   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
472   PetscCall(Scatter_i(X,user->yiwork,user->yi_scatter,user->nt));
473   for (i=0; i<user->nt; i++) {
474     /* (Div' * b(:,i)) */
475     PetscCall(MatMult(user->Grad,user->yiwork[i],user->Swork));
476 
477     /* sdiag(Grad*y(:,i)) */
478     PetscCall(MatMult(user->Grad,user->yi[i],user->Twork));
479 
480     /* (sdiag(Grad*y(:,i)) * (Div' * b(:,i))) */
481     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
482 
483     /* (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i)))) */
484     PetscCall(VecPointwiseMult(user->Twork,user->Rwork,user->Twork));
485 
486     /* (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
487     PetscCall(MatMult(user->AvT,user->Twork,user->yiwork[i]));
488 
489     /* sdiag(1./v) * (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
490     PetscCall(VecPointwiseMult(user->yiwork[i],user->uwork,user->yiwork[i]));
491     PetscCall(VecAXPY(Y,user->ht,user->yiwork[i]));
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
497 {
498   AppCtx         *user;
499 
500   PetscFunctionBegin;
501   PetscCall(PCShellGetContext(PC_shell,&user));
502 
503   if (user->dsg_formed) {
504     PetscCall(MatSOR(user->DSG,X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y));
505   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"DSG not formed");
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
510 {
511   AppCtx         *user;
512   PetscInt       its,i;
513 
514   PetscFunctionBegin;
515   PetscCall(MatShellGetContext(J_shell,&user));
516 
517   if (Y == user->ytrue) {
518     /* First solve is done with true solution to set up problem */
519     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
520   } else {
521     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
522   }
523 
524   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
525   PetscCall(KSPSolve(user->solver,user->yi[0],user->yiwork[0]));
526   PetscCall(KSPGetIterationNumber(user->solver,&its));
527   user->ksp_its = user->ksp_its + its;
528 
529   for (i=1; i<user->nt; i++) {
530     PetscCall(VecAXPY(user->yi[i],1.0,user->yiwork[i-1]));
531     PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
532     PetscCall(KSPGetIterationNumber(user->solver,&its));
533     user->ksp_its = user->ksp_its + its;
534   }
535   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
540 {
541   AppCtx         *user;
542   PetscInt       its,i;
543 
544   PetscFunctionBegin;
545   PetscCall(MatShellGetContext(J_shell,&user));
546 
547   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
548 
549   i = user->nt - 1;
550   PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
551 
552   PetscCall(KSPGetIterationNumber(user->solver,&its));
553   user->ksp_its = user->ksp_its + its;
554 
555   for (i=user->nt-2; i>=0; i--) {
556     PetscCall(VecAXPY(user->yi[i],1.0,user->yiwork[i+1]));
557     PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
558 
559     PetscCall(KSPGetIterationNumber(user->solver,&its));
560     user->ksp_its = user->ksp_its + its;
561 
562   }
563 
564   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
565   PetscFunctionReturn(0);
566 }
567 
568 PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
569 {
570   AppCtx         *user;
571 
572   PetscFunctionBegin;
573   PetscCall(MatShellGetContext(J_shell,&user));
574 
575   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell));
576   PetscCall(MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult));
577   PetscCall(MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate));
578   PetscCall(MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose));
579   PetscCall(MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal));
580   PetscFunctionReturn(0);
581 }
582 
583 PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
584 {
585   AppCtx         *user;
586 
587   PetscFunctionBegin;
588   PetscCall(MatShellGetContext(J_shell,&user));
589   PetscCall(VecCopy(user->js_diag,X));
590   PetscFunctionReturn(0);
591 
592 }
593 
594 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
595 {
596   /* con = Ay - q, A = [B  0  0 ... 0;
597                        -I  B  0 ... 0;
598                         0 -I  B ... 0;
599                              ...     ;
600                         0    ... -I B]
601      B = ht * Div * Sigma * Grad + eye */
602   PetscInt       i;
603   AppCtx         *user = (AppCtx*)ptr;
604 
605   PetscFunctionBegin;
606   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
607   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
608   PetscCall(MatMult(user->JsBlock,user->yi[0],user->yiwork[0]));
609   for (i=1; i<user->nt; i++) {
610     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
611     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]));
612   }
613   PetscCall(Gather_i(C,user->yiwork,user->yi_scatter,user->nt));
614   PetscCall(VecAXPY(C,-1.0,user->q));
615   PetscFunctionReturn(0);
616 }
617 
618 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
619 {
620   PetscFunctionBegin;
621   PetscCall(VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD));
622   PetscCall(VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD));
623   PetscCall(VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD));
624   PetscCall(VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD));
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode Scatter_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
629 {
630   PetscInt       i;
631 
632   PetscFunctionBegin;
633   for (i=0; i<nt; i++) {
634     PetscCall(VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD));
635     PetscCall(VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD));
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
641 {
642   PetscFunctionBegin;
643   PetscCall(VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE));
644   PetscCall(VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE));
645   PetscCall(VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE));
646   PetscCall(VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE));
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode Gather_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
651 {
652   PetscInt       i;
653 
654   PetscFunctionBegin;
655   for (i=0; i<nt; i++) {
656     PetscCall(VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE));
657     PetscCall(VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE));
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode ParabolicInitialize(AppCtx *user)
663 {
664   PetscInt       m,n,i,j,k,linear_index,istart,iend,iblock,lo,hi,lo2,hi2;
665   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork,yi,di,bc;
666   PetscReal      *x, *y, *z;
667   PetscReal      h,stime;
668   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
669   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
670   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
671   PetscScalar    v,vx,vy,vz;
672   IS             is_from_y,is_to_yi,is_from_d,is_to_di;
673   /* Data locations */
674   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
675                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
676                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
677                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
678                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
679                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
680                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
681                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
682 
683   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
684                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
685                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
686                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
687                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
688                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
689                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
690                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
691 
692   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
693                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
694                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
695                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
696                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
697                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
698                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
699                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
700 
701   PetscFunctionBegin;
702   PetscCall(PetscMalloc1(user->mx,&x));
703   PetscCall(PetscMalloc1(user->mx,&y));
704   PetscCall(PetscMalloc1(user->mx,&z));
705   user->jformed = PETSC_FALSE;
706   user->dsg_formed = PETSC_FALSE;
707 
708   n = user->mx * user->mx * user->mx;
709   m = 3 * user->mx * user->mx * (user->mx-1);
710   sqrt_beta = PetscSqrtScalar(user->beta);
711 
712   user->ksp_its = 0;
713   user->ksp_its_initial = 0;
714 
715   stime = (PetscReal)user->nt/user->ns;
716   PetscCall(PetscMalloc1(user->ns,&user->sample_times));
717   for (i=0; i<user->ns; i++) {
718     user->sample_times[i] = (PetscInt)(stime*((PetscReal)i+1.0)-0.5);
719   }
720 
721   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
722   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
723   PetscCall(VecSetSizes(XX,PETSC_DECIDE,n));
724   PetscCall(VecSetSizes(user->q,PETSC_DECIDE,n*user->nt));
725   PetscCall(VecSetFromOptions(XX));
726   PetscCall(VecSetFromOptions(user->q));
727 
728   PetscCall(VecDuplicate(XX,&YY));
729   PetscCall(VecDuplicate(XX,&ZZ));
730   PetscCall(VecDuplicate(XX,&XXwork));
731   PetscCall(VecDuplicate(XX,&YYwork));
732   PetscCall(VecDuplicate(XX,&ZZwork));
733   PetscCall(VecDuplicate(XX,&UTwork));
734   PetscCall(VecDuplicate(XX,&user->utrue));
735   PetscCall(VecDuplicate(XX,&bc));
736 
737   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
738   h = 1.0/user->mx;
739   hinv = user->mx;
740   neg_hinv = -hinv;
741 
742   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
743   for (linear_index=istart; linear_index<iend; linear_index++) {
744     i = linear_index % user->mx;
745     j = ((linear_index-i)/user->mx) % user->mx;
746     k = ((linear_index-i)/user->mx-j) / user->mx;
747     vx = h*(i+0.5);
748     vy = h*(j+0.5);
749     vz = h*(k+0.5);
750     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
751     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
752     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
753     if ((vx<0.6) && (vx>0.4) && (vy<0.6) && (vy>0.4) && (vy<0.6) && (vz<0.6) && (vz>0.4)) {
754       v = 1000.0;
755       PetscCall(VecSetValues(bc,1,&linear_index,&v,INSERT_VALUES));
756     }
757   }
758 
759   PetscCall(VecAssemblyBegin(XX));
760   PetscCall(VecAssemblyEnd(XX));
761   PetscCall(VecAssemblyBegin(YY));
762   PetscCall(VecAssemblyEnd(YY));
763   PetscCall(VecAssemblyBegin(ZZ));
764   PetscCall(VecAssemblyEnd(ZZ));
765   PetscCall(VecAssemblyBegin(bc));
766   PetscCall(VecAssemblyEnd(bc));
767 
768   /* Compute true parameter function
769      ut = 0.5 + exp(-10*((x-0.5)^2+(y-0.5)^2+(z-0.5)^2)) */
770   PetscCall(VecCopy(XX,XXwork));
771   PetscCall(VecCopy(YY,YYwork));
772   PetscCall(VecCopy(ZZ,ZZwork));
773 
774   PetscCall(VecShift(XXwork,-0.5));
775   PetscCall(VecShift(YYwork,-0.5));
776   PetscCall(VecShift(ZZwork,-0.5));
777 
778   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
779   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
780   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
781 
782   PetscCall(VecCopy(XXwork,user->utrue));
783   PetscCall(VecAXPY(user->utrue,1.0,YYwork));
784   PetscCall(VecAXPY(user->utrue,1.0,ZZwork));
785   PetscCall(VecScale(user->utrue,-10.0));
786   PetscCall(VecExp(user->utrue));
787   PetscCall(VecShift(user->utrue,0.5));
788 
789   PetscCall(VecDestroy(&XX));
790   PetscCall(VecDestroy(&YY));
791   PetscCall(VecDestroy(&ZZ));
792   PetscCall(VecDestroy(&XXwork));
793   PetscCall(VecDestroy(&YYwork));
794   PetscCall(VecDestroy(&ZZwork));
795   PetscCall(VecDestroy(&UTwork));
796 
797   /* Initial guess and reference model */
798   PetscCall(VecDuplicate(user->utrue,&user->ur));
799   PetscCall(VecSet(user->ur,0.0));
800 
801   /* Generate Grad matrix */
802   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
803   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,m,n));
804   PetscCall(MatSetFromOptions(user->Grad));
805   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
806   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
807   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
808 
809   for (i=istart; i<iend; i++) {
810     if (i<m/3) {
811       iblock = i / (user->mx-1);
812       j = iblock*user->mx + (i % (user->mx-1));
813       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
814       j = j+1;
815       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
816     }
817     if (i>=m/3 && i<2*m/3) {
818       iblock = (i-m/3) / (user->mx*(user->mx-1));
819       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
820       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
821       j = j + user->mx;
822       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
823     }
824     if (i>=2*m/3) {
825       j = i-2*m/3;
826       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
827       j = j + user->mx*user->mx;
828       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
829     }
830   }
831 
832   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
833   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
834 
835   /* Generate arithmetic averaging matrix Av */
836   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
837   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,PETSC_DECIDE,m,n));
838   PetscCall(MatSetFromOptions(user->Av));
839   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
840   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
841   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
842 
843   for (i=istart; i<iend; i++) {
844     if (i<m/3) {
845       iblock = i / (user->mx-1);
846       j = iblock*user->mx + (i % (user->mx-1));
847       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
848       j = j+1;
849       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
850     }
851     if (i>=m/3 && i<2*m/3) {
852       iblock = (i-m/3) / (user->mx*(user->mx-1));
853       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
854       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
855       j = j + user->mx;
856       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
857     }
858     if (i>=2*m/3) {
859       j = i-2*m/3;
860       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
861       j = j + user->mx*user->mx;
862       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
863     }
864   }
865 
866   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
867   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
868 
869   /* Generate transpose of averaging matrix Av */
870   PetscCall(MatTranspose(user->Av,MAT_INITIAL_MATRIX,&user->AvT));
871 
872   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
873   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,PETSC_DECIDE,m+n,n));
874   PetscCall(MatSetFromOptions(user->L));
875   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
876   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
877   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
878 
879   for (i=istart; i<iend; i++) {
880     if (i<m/3) {
881       iblock = i / (user->mx-1);
882       j = iblock*user->mx + (i % (user->mx-1));
883       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
884       j = j+1;
885       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
886     }
887     if (i>=m/3 && i<2*m/3) {
888       iblock = (i-m/3) / (user->mx*(user->mx-1));
889       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
890       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
891       j = j + user->mx;
892       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
893     }
894     if (i>=2*m/3 && i<m) {
895       j = i-2*m/3;
896       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
897       j = j + user->mx*user->mx;
898       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
899     }
900     if (i>=m) {
901       j = i - m;
902       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
903     }
904   }
905 
906   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
907   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
908 
909   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
910 
911   /* Generate Div matrix */
912   PetscCall(MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div));
913 
914   /* Build work vectors and matrices */
915   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
916   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, user->mx*user->mx*(user->mx-1)*3));
917   PetscCall(VecSetFromOptions(user->S));
918 
919   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
920   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
921   PetscCall(VecSetFromOptions(user->lwork));
922 
923   PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
924   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
925 
926   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
927   PetscCall(VecSetSizes(user->d,PETSC_DECIDE,user->ndata*user->nt));
928   PetscCall(VecSetFromOptions(user->d));
929 
930   PetscCall(VecDuplicate(user->S,&user->Swork));
931   PetscCall(VecDuplicate(user->S,&user->Av_u));
932   PetscCall(VecDuplicate(user->S,&user->Twork));
933   PetscCall(VecDuplicate(user->S,&user->Rwork));
934   PetscCall(VecDuplicate(user->y,&user->ywork));
935   PetscCall(VecDuplicate(user->u,&user->uwork));
936   PetscCall(VecDuplicate(user->u,&user->js_diag));
937   PetscCall(VecDuplicate(user->c,&user->cwork));
938   PetscCall(VecDuplicate(user->d,&user->dwork));
939 
940   /* Create matrix-free shell user->Js for computing A*x */
941   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->Js));
942   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
943   PetscCall(MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate));
944   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose));
945   PetscCall(MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal));
946 
947   /* Diagonal blocks of user->Js */
948   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlock));
949   PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult));
950   /* Blocks are symmetric */
951   PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMult));
952 
953   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
954      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
955      This is an SSOR preconditioner for user->JsBlock. */
956   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlockPrec));
957   PetscCall(MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult));
958   /* JsBlockPrec is symmetric */
959   PetscCall(MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMult));
960   PetscCall(MatSetOption(user->JsBlockPrec,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
961 
962   /* Create a matrix-free shell user->Jd for computing B*x */
963   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m,user,&user->Jd));
964   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
965   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
966 
967   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
968   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->JsInv));
969   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult));
970   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult));
971 
972   /* Solver options and tolerances */
973   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
974   PetscCall(KSPSetType(user->solver,KSPCG));
975   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec));
976   PetscCall(KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE));
977   PetscCall(KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500));
978   PetscCall(KSPSetFromOptions(user->solver));
979   PetscCall(KSPGetPC(user->solver,&user->prec));
980   PetscCall(PCSetType(user->prec,PCSHELL));
981 
982   PetscCall(PCShellSetApply(user->prec,StateMatBlockPrecMult));
983   PetscCall(PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMult));
984   PetscCall(PCShellSetContext(user->prec,user));
985 
986   /* Create scatter from y to y_1,y_2,...,y_nt */
987   PetscCall(PetscMalloc1(user->nt*user->m,&user->yi_scatter));
988   PetscCall(VecCreate(PETSC_COMM_WORLD,&yi));
989   PetscCall(VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx*user->mx));
990   PetscCall(VecSetFromOptions(yi));
991   PetscCall(VecDuplicateVecs(yi,user->nt,&user->yi));
992   PetscCall(VecDuplicateVecs(yi,user->nt,&user->yiwork));
993 
994   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
995   istart = 0;
996   for (i=0; i<user->nt; i++) {
997     PetscCall(VecGetOwnershipRange(user->yi[i],&lo,&hi));
998     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi));
999     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
1000     PetscCall(VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]));
1001     istart = istart + hi-lo;
1002     PetscCall(ISDestroy(&is_to_yi));
1003     PetscCall(ISDestroy(&is_from_y));
1004   }
1005   PetscCall(VecDestroy(&yi));
1006 
1007   /* Create scatter from d to d_1,d_2,...,d_ns */
1008   PetscCall(PetscMalloc1(user->ns*user->ndata,&user->di_scatter));
1009   PetscCall(VecCreate(PETSC_COMM_WORLD,&di));
1010   PetscCall(VecSetSizes(di,PETSC_DECIDE,user->ndata));
1011   PetscCall(VecSetFromOptions(di));
1012   PetscCall(VecDuplicateVecs(di,user->ns,&user->di));
1013   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
1014   istart = 0;
1015   for (i=0; i<user->ns; i++) {
1016     PetscCall(VecGetOwnershipRange(user->di[i],&lo,&hi));
1017     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_di));
1018     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
1019     PetscCall(VecScatterCreate(user->d,is_from_d,user->di[i],is_to_di,&user->di_scatter[i]));
1020     istart = istart + hi-lo;
1021     PetscCall(ISDestroy(&is_to_di));
1022     PetscCall(ISDestroy(&is_from_d));
1023   }
1024   PetscCall(VecDestroy(&di));
1025 
1026   /* Assemble RHS of forward problem */
1027   PetscCall(VecCopy(bc,user->yiwork[0]));
1028   for (i=1; i<user->nt; i++) {
1029     PetscCall(VecSet(user->yiwork[i],0.0));
1030   }
1031   PetscCall(Gather_i(user->q,user->yiwork,user->yi_scatter,user->nt));
1032   PetscCall(VecDestroy(&bc));
1033 
1034   /* Compute true state function yt given ut */
1035   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->ytrue));
1036   PetscCall(VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt));
1037   PetscCall(VecSetFromOptions(user->ytrue));
1038 
1039   /* First compute Av_u = Av*exp(-u) */
1040   PetscCall(VecSet(user->uwork,0));
1041   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /*  Note: user->utrue */
1042   PetscCall(VecExp(user->uwork));
1043   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1044 
1045   /* Symbolic DSG = Div * Grad */
1046   PetscCall(MatProductCreate(user->Div,user->Grad,NULL,&user->DSG));
1047   PetscCall(MatProductSetType(user->DSG,MATPRODUCT_AB));
1048   PetscCall(MatProductSetAlgorithm(user->DSG,"default"));
1049   PetscCall(MatProductSetFill(user->DSG,PETSC_DEFAULT));
1050   PetscCall(MatProductSetFromOptions(user->DSG));
1051   PetscCall(MatProductSymbolic(user->DSG));
1052 
1053   user->dsg_formed = PETSC_TRUE;
1054 
1055   /* Next form DSG = Div*Grad */
1056   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1057   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1058   if (user->dsg_formed) {
1059     PetscCall(MatProductNumeric(user->DSG));
1060   } else {
1061     PetscCall(MatMatMult(user->Div,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
1062     user->dsg_formed = PETSC_TRUE;
1063   }
1064   /* B = speye(nx^3) + ht*DSG; */
1065   PetscCall(MatScale(user->DSG,user->ht));
1066   PetscCall(MatShift(user->DSG,1.0));
1067 
1068   /* Now solve for ytrue */
1069   PetscCall(StateMatInvMult(user->Js,user->q,user->ytrue));
1070 
1071   /* Initial guess y0 for state given u0 */
1072 
1073   /* First compute Av_u = Av*exp(-u) */
1074   PetscCall(VecSet(user->uwork,0));
1075   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /*  Note: user->u */
1076   PetscCall(VecExp(user->uwork));
1077   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1078 
1079   /* Next form DSG = Div*S*Grad */
1080   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1081   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1082   if (user->dsg_formed) {
1083     PetscCall(MatProductNumeric(user->DSG));
1084   } else {
1085     PetscCall(MatMatMult(user->Div,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
1086 
1087     user->dsg_formed = PETSC_TRUE;
1088   }
1089   /* B = speye(nx^3) + ht*DSG; */
1090   PetscCall(MatScale(user->DSG,user->ht));
1091   PetscCall(MatShift(user->DSG,1.0));
1092 
1093   /* Now solve for y */
1094   PetscCall(StateMatInvMult(user->Js,user->q,user->y));
1095 
1096   /* Construct projection matrix Q, a block diagonal matrix consisting of nt copies of Qblock along the diagonal */
1097   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Qblock));
1098   PetscCall(MatSetSizes(user->Qblock,PETSC_DECIDE,PETSC_DECIDE,user->ndata,n));
1099   PetscCall(MatSetFromOptions(user->Qblock));
1100   PetscCall(MatMPIAIJSetPreallocation(user->Qblock,8,NULL,8,NULL));
1101   PetscCall(MatSeqAIJSetPreallocation(user->Qblock,8,NULL));
1102 
1103   for (i=0; i<user->mx; i++) {
1104     x[i] = h*(i+0.5);
1105     y[i] = h*(i+0.5);
1106     z[i] = h*(i+0.5);
1107   }
1108 
1109   PetscCall(MatGetOwnershipRange(user->Qblock,&istart,&iend));
1110   nx = user->mx; ny = user->mx; nz = user->mx;
1111   for (i=istart; i<iend; i++) {
1112     xri = xr[i];
1113     im = 0;
1114     xim = x[im];
1115     while (xri>xim && im<nx) {
1116       im = im+1;
1117       xim = x[im];
1118     }
1119     indx1 = im-1;
1120     indx2 = im;
1121     dx1 = xri - x[indx1];
1122     dx2 = x[indx2] - xri;
1123 
1124     yri = yr[i];
1125     im = 0;
1126     yim = y[im];
1127     while (yri>yim && im<ny) {
1128       im = im+1;
1129       yim = y[im];
1130     }
1131     indy1 = im-1;
1132     indy2 = im;
1133     dy1 = yri - y[indy1];
1134     dy2 = y[indy2] - yri;
1135 
1136     zri = zr[i];
1137     im = 0;
1138     zim = z[im];
1139     while (zri>zim && im<nz) {
1140       im = im+1;
1141       zim = z[im];
1142     }
1143     indz1 = im-1;
1144     indz2 = im;
1145     dz1 = zri - z[indz1];
1146     dz2 = z[indz2] - zri;
1147 
1148     Dx = x[indx2] - x[indx1];
1149     Dy = y[indy2] - y[indy1];
1150     Dz = z[indz2] - z[indz1];
1151 
1152     j = indx1 + indy1*nx + indz1*nx*ny;
1153     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1154     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1155 
1156     j = indx1 + indy1*nx + indz2*nx*ny;
1157     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1158     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1159 
1160     j = indx1 + indy2*nx + indz1*nx*ny;
1161     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1162     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1163 
1164     j = indx1 + indy2*nx + indz2*nx*ny;
1165     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1166     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1167 
1168     j = indx2 + indy1*nx + indz1*nx*ny;
1169     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1170     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1171 
1172     j = indx2 + indy1*nx + indz2*nx*ny;
1173     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1174     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1175 
1176     j = indx2 + indy2*nx + indz1*nx*ny;
1177     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1178     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1179 
1180     j = indx2 + indy2*nx + indz2*nx*ny;
1181     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1182     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1183   }
1184   PetscCall(MatAssemblyBegin(user->Qblock,MAT_FINAL_ASSEMBLY));
1185   PetscCall(MatAssemblyEnd(user->Qblock,MAT_FINAL_ASSEMBLY));
1186 
1187   PetscCall(MatTranspose(user->Qblock,MAT_INITIAL_MATRIX,&user->QblockT));
1188   PetscCall(MatTranspose(user->L,MAT_INITIAL_MATRIX,&user->LT));
1189 
1190   /* Add noise to the measurement data */
1191   PetscCall(VecSet(user->ywork,1.0));
1192   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1193   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
1194   for (j=0; j<user->ns; j++) {
1195     i = user->sample_times[j];
1196     PetscCall(MatMult(user->Qblock,user->yiwork[i],user->di[j]));
1197   }
1198   PetscCall(Gather_i(user->d,user->di,user->di_scatter,user->ns));
1199 
1200   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1201   PetscCall(KSPSetFromOptions(user->solver));
1202   PetscCall(PetscFree(x));
1203   PetscCall(PetscFree(y));
1204   PetscCall(PetscFree(z));
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 PetscErrorCode ParabolicDestroy(AppCtx *user)
1209 {
1210   PetscInt       i;
1211 
1212   PetscFunctionBegin;
1213   PetscCall(MatDestroy(&user->Qblock));
1214   PetscCall(MatDestroy(&user->QblockT));
1215   PetscCall(MatDestroy(&user->Div));
1216   PetscCall(MatDestroy(&user->Divwork));
1217   PetscCall(MatDestroy(&user->Grad));
1218   PetscCall(MatDestroy(&user->Av));
1219   PetscCall(MatDestroy(&user->Avwork));
1220   PetscCall(MatDestroy(&user->AvT));
1221   PetscCall(MatDestroy(&user->DSG));
1222   PetscCall(MatDestroy(&user->L));
1223   PetscCall(MatDestroy(&user->LT));
1224   PetscCall(KSPDestroy(&user->solver));
1225   PetscCall(MatDestroy(&user->Js));
1226   PetscCall(MatDestroy(&user->Jd));
1227   PetscCall(MatDestroy(&user->JsInv));
1228   PetscCall(MatDestroy(&user->JsBlock));
1229   PetscCall(MatDestroy(&user->JsBlockPrec));
1230   PetscCall(VecDestroy(&user->u));
1231   PetscCall(VecDestroy(&user->uwork));
1232   PetscCall(VecDestroy(&user->utrue));
1233   PetscCall(VecDestroy(&user->y));
1234   PetscCall(VecDestroy(&user->ywork));
1235   PetscCall(VecDestroy(&user->ytrue));
1236   PetscCall(VecDestroyVecs(user->nt,&user->yi));
1237   PetscCall(VecDestroyVecs(user->nt,&user->yiwork));
1238   PetscCall(VecDestroyVecs(user->ns,&user->di));
1239   PetscCall(PetscFree(user->yi));
1240   PetscCall(PetscFree(user->yiwork));
1241   PetscCall(PetscFree(user->di));
1242   PetscCall(VecDestroy(&user->c));
1243   PetscCall(VecDestroy(&user->cwork));
1244   PetscCall(VecDestroy(&user->ur));
1245   PetscCall(VecDestroy(&user->q));
1246   PetscCall(VecDestroy(&user->d));
1247   PetscCall(VecDestroy(&user->dwork));
1248   PetscCall(VecDestroy(&user->lwork));
1249   PetscCall(VecDestroy(&user->S));
1250   PetscCall(VecDestroy(&user->Swork));
1251   PetscCall(VecDestroy(&user->Av_u));
1252   PetscCall(VecDestroy(&user->Twork));
1253   PetscCall(VecDestroy(&user->Rwork));
1254   PetscCall(VecDestroy(&user->js_diag));
1255   PetscCall(ISDestroy(&user->s_is));
1256   PetscCall(ISDestroy(&user->d_is));
1257   PetscCall(VecScatterDestroy(&user->state_scatter));
1258   PetscCall(VecScatterDestroy(&user->design_scatter));
1259   for (i=0; i<user->nt; i++) {
1260     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1261   }
1262   for (i=0; i<user->ns; i++) {
1263     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1264   }
1265   PetscCall(PetscFree(user->yi_scatter));
1266   PetscCall(PetscFree(user->di_scatter));
1267   PetscCall(PetscFree(user->sample_times));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 PetscErrorCode ParabolicMonitor(Tao tao, void *ptr)
1272 {
1273   Vec            X;
1274   PetscReal      unorm,ynorm;
1275   AppCtx         *user = (AppCtx*)ptr;
1276 
1277   PetscFunctionBegin;
1278   PetscCall(TaoGetSolution(tao,&X));
1279   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1280   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1281   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1282   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1283   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1284   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 /*TEST
1289 
1290    build:
1291       requires: !complex
1292 
1293    test:
1294       args: -tao_cmonitor -tao_type lcl -ns 1 -tao_gatol 1.e-4 -ksp_max_it 30
1295       requires: !single
1296 
1297 TEST*/
1298