xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision a4af0ceea8a251db97ee0dc5c0d52d4adf50264a)
1 #include <petsc/private/taoimpl.h>
2 
3 /*T
4    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
5    Routines: TaoCreate();
6    Routines: TaoSetType();
7    Routines: TaoSetInitialVector();
8    Routines: TaoSetObjectiveRoutine();
9    Routines: TaoSetGradientRoutine();
10    Routines: TaoSetConstraintsRoutine();
11    Routines: TaoSetJacobianStateRoutine();
12    Routines: TaoSetJacobianDesignRoutine();
13    Routines: TaoSetStateDesignIS();
14    Routines: TaoSetFromOptions();
15    Routines: TaoSolve();
16    Routines: TaoDestroy();
17    Processors: n
18 T*/
19 
20 typedef struct {
21   PetscInt n; /* Number of total variables */
22   PetscInt m; /* Number of constraints */
23   PetscInt nstate;
24   PetscInt ndesign;
25   PetscInt mx; /* grid points in each direction */
26   PetscInt ns; /* Number of data samples (1<=ns<=8)
27                   Currently only ns=1 is supported */
28   PetscInt ndata; /* Number of data points per sample */
29   IS       s_is;
30   IS       d_is;
31 
32   VecScatter state_scatter;
33   VecScatter design_scatter;
34   VecScatter *yi_scatter, *di_scatter;
35   Vec        suby,subq,subd;
36   Mat        Js,Jd,JsPrec,JsInv,JsBlock;
37 
38   PetscReal alpha; /* Regularization parameter */
39   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
40   PetscReal noise; /* Amount of noise to add to data */
41   PetscReal *ones;
42   Mat       Q;
43   Mat       MQ;
44   Mat       L;
45 
46   Mat Grad;
47   Mat Av,Avwork;
48   Mat Div, Divwork;
49   Mat DSG;
50   Mat Diag,Ones;
51 
52   Vec q;
53   Vec ur; /* reference */
54 
55   Vec d;
56   Vec dwork;
57 
58   Vec x; /* super vec of y,u */
59 
60   Vec y; /* state variables */
61   Vec ywork;
62 
63   Vec ytrue;
64 
65   Vec u; /* design variables */
66   Vec uwork;
67 
68   Vec utrue;
69 
70   Vec js_diag;
71 
72   Vec c; /* constraint vector */
73   Vec cwork;
74 
75   Vec lwork;
76   Vec S;
77   Vec Swork,Twork,Sdiag,Ywork;
78   Vec Av_u;
79 
80   KSP solver;
81   PC  prec;
82 
83   PetscReal tola,tolb,tolc,told;
84   PetscInt  ksp_its;
85   PetscInt  ksp_its_initial;
86   PetscLogStage stages[10];
87   PetscBool use_ptap;
88   PetscBool use_lrc;
89 } AppCtx;
90 
91 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
92 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
93 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
94 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
95 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
96 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
97 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
98 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
99 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
100 PetscErrorCode EllipticInitialize(AppCtx*);
101 PetscErrorCode EllipticDestroy(AppCtx*);
102 PetscErrorCode EllipticMonitor(Tao, void*);
103 
104 PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
105 PetscErrorCode StateMatMult(Mat,Vec,Vec);
106 
107 PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
108 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
109 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
110 
111 PetscErrorCode QMatMult(Mat,Vec,Vec);
112 PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);
113 
114 static  char help[]="";
115 
116 int main(int argc, char **argv)
117 {
118   PetscErrorCode     ierr;
119   Vec                x0;
120   Tao                tao;
121   AppCtx             user;
122   PetscInt           ntests = 1;
123   PetscInt           i;
124 
125   ierr = PetscInitialize(&argc, &argv, (char*)0,help);if (ierr) return ierr;
126   user.mx = 8;
127   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);CHKERRQ(ierr);
128   ierr = PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL);CHKERRQ(ierr);
129   user.ns = 6;
130   ierr = PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL);CHKERRQ(ierr);
131   user.ndata = 64;
132   ierr = PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL);CHKERRQ(ierr);
133   user.alpha = 0.1;
134   ierr = PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL);CHKERRQ(ierr);
135   user.beta = 0.00001;
136   ierr = PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL);CHKERRQ(ierr);
137   user.noise = 0.01;
138   ierr = PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL);CHKERRQ(ierr);
139 
140   user.use_ptap = PETSC_FALSE;
141   ierr = PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL);CHKERRQ(ierr);
142   user.use_lrc = PETSC_FALSE;
143   ierr = PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL);CHKERRQ(ierr);
144   ierr = PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL);CHKERRQ(ierr);
145   ierr = PetscOptionsEnd();CHKERRQ(ierr);
146 
147   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
148   user.nstate =  user.m;
149   user.ndesign = user.mx*user.mx*user.mx;
150   user.n = user.nstate + user.ndesign; /* number of variables */
151 
152   /* Create TAO solver and set desired solution method */
153   ierr = TaoCreate(PETSC_COMM_WORLD,&tao);CHKERRQ(ierr);
154   ierr = TaoSetType(tao,TAOLCL);CHKERRQ(ierr);
155 
156   /* Set up initial vectors and matrices */
157   ierr = EllipticInitialize(&user);CHKERRQ(ierr);
158 
159   ierr = Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter);CHKERRQ(ierr);
160   ierr = VecDuplicate(user.x,&x0);CHKERRQ(ierr);
161   ierr = VecCopy(user.x,x0);CHKERRQ(ierr);
162 
163   /* Set solution vector with an initial guess */
164   ierr = TaoSetInitialVector(tao,user.x);CHKERRQ(ierr);
165   ierr = TaoSetObjectiveRoutine(tao, FormFunction, &user);CHKERRQ(ierr);
166   ierr = TaoSetGradientRoutine(tao, FormGradient, &user);CHKERRQ(ierr);
167   ierr = TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user);CHKERRQ(ierr);
168 
169   ierr = TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user);CHKERRQ(ierr);
170   ierr = TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user);CHKERRQ(ierr);
171 
172   ierr = TaoSetStateDesignIS(tao,user.s_is,user.d_is);CHKERRQ(ierr);
173   ierr = TaoSetFromOptions(tao);CHKERRQ(ierr);
174 
175   /* SOLVE THE APPLICATION */
176   ierr = PetscLogStageRegister("Trials",&user.stages[1]);CHKERRQ(ierr);
177   ierr = PetscLogStagePush(user.stages[1]);CHKERRQ(ierr);
178   for (i=0; i<ntests; i++) {
179     ierr = TaoSolve(tao);CHKERRQ(ierr);
180     ierr = PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its);CHKERRQ(ierr);
181     ierr = VecCopy(x0,user.x);CHKERRQ(ierr);
182   }
183   ierr = PetscLogStagePop();CHKERRQ(ierr);
184   ierr = PetscBarrier((PetscObject)user.x);CHKERRQ(ierr);
185   ierr = PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");CHKERRQ(ierr);
186   ierr = PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);CHKERRQ(ierr);
187 
188   ierr = TaoDestroy(&tao);CHKERRQ(ierr);
189   ierr = VecDestroy(&x0);CHKERRQ(ierr);
190   ierr = EllipticDestroy(&user);CHKERRQ(ierr);
191   ierr = PetscFinalize();
192   return ierr;
193 }
194 /* ------------------------------------------------------------------- */
195 /*
196    dwork = Qy - d
197    lwork = L*(u-ur)
198    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
199 */
200 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
201 {
202   PetscErrorCode ierr;
203   PetscReal      d1=0,d2=0;
204   AppCtx         *user = (AppCtx*)ptr;
205 
206   PetscFunctionBegin;
207   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
208   ierr = MatMult(user->MQ,user->y,user->dwork);CHKERRQ(ierr);
209   ierr = VecAXPY(user->dwork,-1.0,user->d);CHKERRQ(ierr);
210   ierr = VecDot(user->dwork,user->dwork,&d1);CHKERRQ(ierr);
211   ierr = VecWAXPY(user->uwork,-1.0,user->ur,user->u);CHKERRQ(ierr);
212   ierr = MatMult(user->L,user->uwork,user->lwork);CHKERRQ(ierr);
213   ierr = VecDot(user->lwork,user->lwork,&d2);CHKERRQ(ierr);
214   *f = 0.5 * (d1 + user->alpha*d2);
215   PetscFunctionReturn(0);
216 }
217 
218 /* ------------------------------------------------------------------- */
219 /*
220     state: g_s = Q' *(Qy - d)
221     design: g_d = alpha*L'*L*(u-ur)
222 */
223 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
224 {
225   PetscErrorCode ierr;
226   AppCtx         *user = (AppCtx*)ptr;
227 
228   PetscFunctionBegin;
229   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
230   ierr = MatMult(user->MQ,user->y,user->dwork);CHKERRQ(ierr);
231   ierr = VecAXPY(user->dwork,-1.0,user->d);CHKERRQ(ierr);
232   ierr = MatMultTranspose(user->MQ,user->dwork,user->ywork);CHKERRQ(ierr);
233   ierr = VecWAXPY(user->uwork,-1.0,user->ur,user->u);CHKERRQ(ierr);
234   ierr = MatMult(user->L,user->uwork,user->lwork);CHKERRQ(ierr);
235   ierr = MatMultTranspose(user->L,user->lwork,user->uwork);CHKERRQ(ierr);
236   ierr = VecScale(user->uwork, user->alpha);CHKERRQ(ierr);
237   ierr = Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
242 {
243   PetscErrorCode ierr;
244   PetscReal      d1,d2;
245   AppCtx         *user = (AppCtx*)ptr;
246 
247   PetscFunctionBegin;
248   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
249   ierr = MatMult(user->MQ,user->y,user->dwork);CHKERRQ(ierr);
250   ierr = VecAXPY(user->dwork,-1.0,user->d);CHKERRQ(ierr);
251   ierr = VecDot(user->dwork,user->dwork,&d1);CHKERRQ(ierr);
252   ierr = MatMultTranspose(user->MQ,user->dwork,user->ywork);CHKERRQ(ierr);
253 
254   ierr = VecWAXPY(user->uwork,-1.0,user->ur,user->u);CHKERRQ(ierr);
255   ierr = MatMult(user->L,user->uwork,user->lwork);CHKERRQ(ierr);
256   ierr = VecDot(user->lwork,user->lwork,&d2);CHKERRQ(ierr);
257   ierr = MatMultTranspose(user->L,user->lwork,user->uwork);CHKERRQ(ierr);
258   ierr = VecScale(user->uwork, user->alpha);CHKERRQ(ierr);
259   *f = 0.5 * (d1 + user->alpha*d2);
260   ierr = Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /* ------------------------------------------------------------------- */
265 /* A
266 MatShell object
267 */
268 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
269 {
270   PetscErrorCode ierr;
271   AppCtx         *user = (AppCtx*)ptr;
272 
273   PetscFunctionBegin;
274   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
275   /* DSG = Div * (1/Av_u) * Grad */
276   ierr = VecSet(user->uwork,0);CHKERRQ(ierr);
277   ierr = VecAXPY(user->uwork,-1.0,user->u);CHKERRQ(ierr);
278   ierr = VecExp(user->uwork);CHKERRQ(ierr);
279   ierr = MatMult(user->Av,user->uwork,user->Av_u);CHKERRQ(ierr);
280   ierr = VecCopy(user->Av_u,user->Swork);CHKERRQ(ierr);
281   ierr = VecReciprocal(user->Swork);CHKERRQ(ierr);
282   if (user->use_ptap) {
283     ierr = MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);CHKERRQ(ierr);
284     ierr = MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);CHKERRQ(ierr);
285   } else {
286     ierr = MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
287     ierr = MatDiagonalScale(user->Divwork,NULL,user->Swork);CHKERRQ(ierr);
288     ierr = MatProductNumeric(user->DSG);CHKERRQ(ierr);
289   }
290   PetscFunctionReturn(0);
291 }
292 /* ------------------------------------------------------------------- */
293 /* B */
294 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
295 {
296   PetscErrorCode ierr;
297   AppCtx         *user = (AppCtx*)ptr;
298 
299   PetscFunctionBegin;
300   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
305 {
306   PetscErrorCode ierr;
307   PetscReal      sum;
308   AppCtx         *user;
309 
310   PetscFunctionBegin;
311   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
312   ierr = MatMult(user->DSG,X,Y);CHKERRQ(ierr);
313   ierr = VecSum(X,&sum);CHKERRQ(ierr);
314   sum /= user->ndesign;
315   ierr = VecShift(Y,sum);CHKERRQ(ierr);
316   PetscFunctionReturn(0);
317 }
318 
319 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
320 {
321   PetscErrorCode ierr;
322   PetscInt       i;
323   AppCtx         *user;
324 
325   PetscFunctionBegin;
326   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
327   if (user->ns == 1) {
328     ierr = MatMult(user->JsBlock,X,Y);CHKERRQ(ierr);
329   } else {
330     for (i=0;i<user->ns;i++) {
331       ierr = Scatter(X,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
332       ierr = Scatter(Y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
333       ierr = MatMult(user->JsBlock,user->subq,user->suby);CHKERRQ(ierr);
334       ierr = Gather(Y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
341 {
342   PetscErrorCode ierr;
343   PetscInt       its,i;
344   AppCtx         *user;
345 
346   PetscFunctionBegin;
347   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
348   ierr = KSPSetOperators(user->solver,user->JsBlock,user->DSG);CHKERRQ(ierr);
349   if (Y == user->ytrue) {
350     /* First solve is done using true solution to set up problem */
351     ierr = KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr);
352   } else {
353     ierr = KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr);
354   }
355   if (user->ns == 1) {
356     ierr = KSPSolve(user->solver,X,Y);CHKERRQ(ierr);
357     ierr = KSPGetIterationNumber(user->solver,&its);CHKERRQ(ierr);
358     user->ksp_its+=its;
359   } else {
360     for (i=0;i<user->ns;i++) {
361       ierr = Scatter(X,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
362       ierr = Scatter(Y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
363       ierr = KSPSolve(user->solver,user->subq,user->suby);CHKERRQ(ierr);
364       ierr = KSPGetIterationNumber(user->solver,&its);CHKERRQ(ierr);
365       user->ksp_its+=its;
366       ierr = Gather(Y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
367     }
368   }
369   PetscFunctionReturn(0);
370 }
371 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
372 {
373   PetscErrorCode ierr;
374   AppCtx         *user;
375   PetscInt       i;
376 
377   PetscFunctionBegin;
378   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
379   if (user->ns == 1) {
380     ierr = MatMult(user->Q,X,Y);CHKERRQ(ierr);
381   } else {
382     for (i=0;i<user->ns;i++) {
383       ierr = Scatter(X,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
384       ierr = Scatter(Y,user->subd,user->di_scatter[i],0,0);CHKERRQ(ierr);
385       ierr = MatMult(user->Q,user->subq,user->subd);CHKERRQ(ierr);
386       ierr = Gather(Y,user->subd,user->di_scatter[i],0,0);CHKERRQ(ierr);
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
393 {
394   PetscErrorCode ierr;
395   AppCtx         *user;
396   PetscInt       i;
397 
398   PetscFunctionBegin;
399   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
400   if (user->ns == 1) {
401     ierr = MatMultTranspose(user->Q,X,Y);CHKERRQ(ierr);
402   } else {
403     for (i=0;i<user->ns;i++) {
404       ierr = Scatter(X,user->subd,user->di_scatter[i],0,0);CHKERRQ(ierr);
405       ierr = Scatter(Y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
406       ierr = MatMultTranspose(user->Q,user->subd,user->suby);CHKERRQ(ierr);
407       ierr = Gather(Y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
408     }
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
414 {
415   PetscErrorCode ierr;
416   PetscInt       i;
417   AppCtx         *user;
418 
419   PetscFunctionBegin;
420   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
421 
422   /* sdiag(1./v) */
423   ierr = VecSet(user->uwork,0);CHKERRQ(ierr);
424   ierr = VecAXPY(user->uwork,-1.0,user->u);CHKERRQ(ierr);
425   ierr = VecExp(user->uwork);CHKERRQ(ierr);
426 
427   /* sdiag(1./((Av*(1./v)).^2)) */
428   ierr = MatMult(user->Av,user->uwork,user->Swork);CHKERRQ(ierr);
429   ierr = VecPointwiseMult(user->Swork,user->Swork,user->Swork);CHKERRQ(ierr);
430   ierr = VecReciprocal(user->Swork);CHKERRQ(ierr);
431 
432   /* (Av * (sdiag(1./v) * b)) */
433   ierr = VecPointwiseMult(user->uwork,user->uwork,X);CHKERRQ(ierr);
434   ierr = MatMult(user->Av,user->uwork,user->Twork);CHKERRQ(ierr);
435 
436   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
437   ierr = VecPointwiseMult(user->Swork,user->Twork,user->Swork);CHKERRQ(ierr);
438 
439   if (user->ns == 1) {
440     /* (sdiag(Grad*y(:,i)) */
441     ierr = MatMult(user->Grad,user->y,user->Twork);CHKERRQ(ierr);
442 
443     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
444     ierr = VecPointwiseMult(user->Swork,user->Twork,user->Swork);CHKERRQ(ierr);
445     ierr = MatMultTranspose(user->Grad,user->Swork,Y);CHKERRQ(ierr);
446   } else {
447     for (i=0;i<user->ns;i++) {
448       ierr = Scatter(user->y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
449       ierr = Scatter(Y,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
450 
451       ierr = MatMult(user->Grad,user->suby,user->Twork);CHKERRQ(ierr);
452       ierr = VecPointwiseMult(user->Twork,user->Twork,user->Swork);CHKERRQ(ierr);
453       ierr = MatMultTranspose(user->Grad,user->Twork,user->subq);CHKERRQ(ierr);
454       ierr = Gather(user->y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
455       ierr = Gather(Y,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
456     }
457   }
458   PetscFunctionReturn(0);
459 }
460 
461 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
462 {
463   PetscErrorCode ierr;
464   PetscInt       i;
465   AppCtx         *user;
466 
467   PetscFunctionBegin;
468   ierr = MatShellGetContext(J_shell,&user);CHKERRQ(ierr);
469   ierr = VecZeroEntries(Y);CHKERRQ(ierr);
470 
471   /* Sdiag = 1./((Av*(1./v)).^2) */
472   ierr = VecSet(user->uwork,0);CHKERRQ(ierr);
473   ierr = VecAXPY(user->uwork,-1.0,user->u);CHKERRQ(ierr);
474   ierr = VecExp(user->uwork);CHKERRQ(ierr);
475   ierr = MatMult(user->Av,user->uwork,user->Swork);CHKERRQ(ierr);
476   ierr = VecPointwiseMult(user->Sdiag,user->Swork,user->Swork);CHKERRQ(ierr);
477   ierr = VecReciprocal(user->Sdiag);CHKERRQ(ierr);
478 
479   for (i=0;i<user->ns;i++) {
480     ierr = Scatter(X,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
481     ierr = Scatter(user->y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
482 
483     /* Swork = (Div' * b(:,i)) */
484     ierr = MatMult(user->Grad,user->subq,user->Swork);CHKERRQ(ierr);
485 
486     /* Twork = Grad*y(:,i) */
487     ierr = MatMult(user->Grad,user->suby,user->Twork);CHKERRQ(ierr);
488 
489     /* Twork = sdiag(Twork) * Swork */
490     ierr = VecPointwiseMult(user->Twork,user->Swork,user->Twork);CHKERRQ(ierr);
491 
492     /* Swork = pointwisemult(Sdiag,Twork) */
493     ierr = VecPointwiseMult(user->Swork,user->Twork,user->Sdiag);CHKERRQ(ierr);
494 
495     /* Ywork = Av' * Swork */
496     ierr = MatMultTranspose(user->Av,user->Swork,user->Ywork);CHKERRQ(ierr);
497 
498     /* Ywork = pointwisemult(uwork,Ywork) */
499     ierr = VecPointwiseMult(user->Ywork,user->uwork,user->Ywork);CHKERRQ(ierr);
500     ierr = VecAXPY(Y,1.0,user->Ywork);CHKERRQ(ierr);
501     ierr = Gather(user->y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
502   }
503   PetscFunctionReturn(0);
504 }
505 
506 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
507 {
508    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
509    PetscErrorCode ierr;
510    PetscReal      sum;
511    PetscInt       i;
512    AppCtx         *user = (AppCtx*)ptr;
513 
514    PetscFunctionBegin;
515    ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
516    if (user->ns == 1) {
517      ierr = MatMult(user->Grad,user->y,user->Swork);CHKERRQ(ierr);
518      ierr = VecPointwiseDivide(user->Swork,user->Swork,user->Av_u);CHKERRQ(ierr);
519      ierr = MatMultTranspose(user->Grad,user->Swork,C);CHKERRQ(ierr);
520      ierr = VecSum(user->y,&sum);CHKERRQ(ierr);
521      sum /= user->ndesign;
522      ierr = VecShift(C,sum);CHKERRQ(ierr);
523    } else {
524      for (i=0;i<user->ns;i++) {
525       ierr = Scatter(user->y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
526       ierr = Scatter(C,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
527       ierr = MatMult(user->Grad,user->suby,user->Swork);CHKERRQ(ierr);
528       ierr = VecPointwiseDivide(user->Swork,user->Swork,user->Av_u);CHKERRQ(ierr);
529       ierr = MatMultTranspose(user->Grad,user->Swork,user->subq);CHKERRQ(ierr);
530 
531       ierr = VecSum(user->suby,&sum);CHKERRQ(ierr);
532       sum /= user->ndesign;
533       ierr = VecShift(user->subq,sum);CHKERRQ(ierr);
534 
535       ierr = Gather(user->y,user->suby,user->yi_scatter[i],0,0);CHKERRQ(ierr);
536       ierr = Gather(C,user->subq,user->yi_scatter[i],0,0);CHKERRQ(ierr);
537      }
538    }
539    ierr = VecAXPY(C,-1.0,user->q);CHKERRQ(ierr);
540    PetscFunctionReturn(0);
541 }
542 
543 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
544 {
545   PetscErrorCode ierr;
546 
547   PetscFunctionBegin;
548   ierr = VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
549   ierr = VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
550   if (sub2) {
551     ierr = VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
552     ierr = VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
553   }
554   PetscFunctionReturn(0);
555 }
556 
557 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
558 {
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562   ierr = VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
563   ierr = VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
564   if (sub2) {
565     ierr = VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
566     ierr = VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode EllipticInitialize(AppCtx *user)
572 {
573   PetscErrorCode ierr;
574   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
575   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
576   PetscReal      *x,*y,*z;
577   PetscReal      h,meanut;
578   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
579   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
580   IS             is_alldesign,is_allstate;
581   IS             is_from_d;
582   IS             is_from_y;
583   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
584   const PetscInt *ranges, *subranges;
585   PetscMPIInt    size;
586   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
587   PetscScalar    v,vx,vy,vz;
588   PetscInt       offset,subindex,subvec,nrank,kk;
589 
590   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
591                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
592                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
593                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
594                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
595                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
596                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
597                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
598 
599   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
600                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
601                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
602                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
603                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
604                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
605                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
606                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
607 
608   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
609                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
610                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
611                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
612                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
613                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
614                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
615                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
616 
617   PetscFunctionBegin;
618   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr);
619   ierr = PetscLogStageRegister("Elliptic Setup",&user->stages[0]);CHKERRQ(ierr);
620   ierr = PetscLogStagePush(user->stages[0]);CHKERRQ(ierr);
621 
622   /* Create u,y,c,x */
623   ierr = VecCreate(PETSC_COMM_WORLD,&user->u);CHKERRQ(ierr);
624   ierr = VecCreate(PETSC_COMM_WORLD,&user->y);CHKERRQ(ierr);
625   ierr = VecCreate(PETSC_COMM_WORLD,&user->c);CHKERRQ(ierr);
626   ierr = VecSetSizes(user->u,PETSC_DECIDE,user->ndesign);CHKERRQ(ierr);
627   ierr = VecSetFromOptions(user->u);CHKERRQ(ierr);
628   ierr = VecGetLocalSize(user->u,&ysubnlocal);CHKERRQ(ierr);
629   ierr = VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate);CHKERRQ(ierr);
630   ierr = VecSetSizes(user->c,ysubnlocal*user->ns,user->m);CHKERRQ(ierr);
631   ierr = VecSetFromOptions(user->y);CHKERRQ(ierr);
632   ierr = VecSetFromOptions(user->c);CHKERRQ(ierr);
633 
634   /*
635      *******************************
636      Create scatters for x <-> y,u
637      *******************************
638 
639      If the state vector y and design vector u are partitioned as
640      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
641      then the solution vector x is organized as
642      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
643      The index sets user->s_is and user->d_is correspond to the indices of the
644      state and design variables owned by the current processor.
645   */
646   ierr = VecCreate(PETSC_COMM_WORLD,&user->x);CHKERRQ(ierr);
647 
648   ierr = VecGetOwnershipRange(user->y,&lo,&hi);CHKERRQ(ierr);
649   ierr = VecGetOwnershipRange(user->u,&lo2,&hi2);CHKERRQ(ierr);
650 
651   ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate);CHKERRQ(ierr);
652   ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is);CHKERRQ(ierr);
653   ierr = ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign);CHKERRQ(ierr);
654   ierr = ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is);CHKERRQ(ierr);
655 
656   ierr = VecSetSizes(user->x,hi-lo+hi2-lo2,user->n);CHKERRQ(ierr);
657   ierr = VecSetFromOptions(user->x);CHKERRQ(ierr);
658 
659   ierr = VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter);CHKERRQ(ierr);
660   ierr = VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter);CHKERRQ(ierr);
661   ierr = ISDestroy(&is_alldesign);CHKERRQ(ierr);
662   ierr = ISDestroy(&is_allstate);CHKERRQ(ierr);
663   /*
664      *******************************
665      Create scatter from y to y_1,y_2,...,y_ns
666      *******************************
667   */
668   ierr = PetscMalloc1(user->ns,&user->yi_scatter);CHKERRQ(ierr);
669   ierr = VecDuplicate(user->u,&user->suby);CHKERRQ(ierr);
670   ierr = VecDuplicate(user->u,&user->subq);CHKERRQ(ierr);
671 
672   ierr = VecGetOwnershipRange(user->y,&lo2,&hi2);CHKERRQ(ierr);
673   istart = 0;
674   for (i=0; i<user->ns; i++) {
675     ierr = VecGetOwnershipRange(user->suby,&lo,&hi);CHKERRQ(ierr);
676     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y);CHKERRQ(ierr);
677     ierr = VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]);CHKERRQ(ierr);
678     istart = istart + hi-lo;
679     ierr = ISDestroy(&is_from_y);CHKERRQ(ierr);
680   }
681   /*
682      *******************************
683      Create scatter from d to d_1,d_2,...,d_ns
684      *******************************
685   */
686   ierr = VecCreate(PETSC_COMM_WORLD,&user->subd);CHKERRQ(ierr);
687   ierr = VecSetSizes(user->subd,PETSC_DECIDE,user->ndata);CHKERRQ(ierr);
688   ierr = VecSetFromOptions(user->subd);CHKERRQ(ierr);
689   ierr = VecCreate(PETSC_COMM_WORLD,&user->d);CHKERRQ(ierr);
690   ierr = VecGetLocalSize(user->subd,&dsubnlocal);CHKERRQ(ierr);
691   ierr = VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns);CHKERRQ(ierr);
692   ierr = VecSetFromOptions(user->d);CHKERRQ(ierr);
693   ierr = PetscMalloc1(user->ns,&user->di_scatter);CHKERRQ(ierr);
694 
695   ierr = VecGetOwnershipRange(user->d,&lo2,&hi2);CHKERRQ(ierr);
696   istart = 0;
697   for (i=0; i<user->ns; i++) {
698     ierr = VecGetOwnershipRange(user->subd,&lo,&hi);CHKERRQ(ierr);
699     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d);CHKERRQ(ierr);
700     ierr = VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]);CHKERRQ(ierr);
701     istart = istart + hi-lo;
702     ierr = ISDestroy(&is_from_d);CHKERRQ(ierr);
703   }
704 
705   ierr = PetscMalloc1(user->mx,&x);CHKERRQ(ierr);
706   ierr = PetscMalloc1(user->mx,&y);CHKERRQ(ierr);
707   ierr = PetscMalloc1(user->mx,&z);CHKERRQ(ierr);
708 
709   user->ksp_its = 0;
710   user->ksp_its_initial = 0;
711 
712   n = user->mx * user->mx * user->mx;
713   m = 3 * user->mx * user->mx * (user->mx-1);
714   sqrt_beta = PetscSqrtScalar(user->beta);
715 
716   ierr = VecCreate(PETSC_COMM_WORLD,&XX);CHKERRQ(ierr);
717   ierr = VecCreate(PETSC_COMM_WORLD,&user->q);CHKERRQ(ierr);
718   ierr = VecSetSizes(XX,ysubnlocal,n);CHKERRQ(ierr);
719   ierr = VecSetSizes(user->q,ysubnlocal*user->ns,user->m);CHKERRQ(ierr);
720   ierr = VecSetFromOptions(XX);CHKERRQ(ierr);
721   ierr = VecSetFromOptions(user->q);CHKERRQ(ierr);
722 
723   ierr = VecDuplicate(XX,&YY);CHKERRQ(ierr);
724   ierr = VecDuplicate(XX,&ZZ);CHKERRQ(ierr);
725   ierr = VecDuplicate(XX,&XXwork);CHKERRQ(ierr);
726   ierr = VecDuplicate(XX,&YYwork);CHKERRQ(ierr);
727   ierr = VecDuplicate(XX,&ZZwork);CHKERRQ(ierr);
728   ierr = VecDuplicate(XX,&UTwork);CHKERRQ(ierr);
729   ierr = VecDuplicate(XX,&user->utrue);CHKERRQ(ierr);
730 
731   /* map for striding q */
732   ierr = VecGetOwnershipRanges(user->q,&ranges);CHKERRQ(ierr);
733   ierr = VecGetOwnershipRanges(user->u,&subranges);CHKERRQ(ierr);
734 
735   ierr = VecGetOwnershipRange(user->q,&lo2,&hi2);CHKERRQ(ierr);
736   ierr = VecGetOwnershipRange(user->u,&lo,&hi);CHKERRQ(ierr);
737   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
738   h = 1.0/user->mx;
739   hinv = user->mx;
740   neg_hinv = -hinv;
741 
742   ierr = VecGetOwnershipRange(XX,&istart,&iend);CHKERRQ(ierr);
743   for (linear_index=istart; linear_index<iend; linear_index++) {
744     i = linear_index % user->mx;
745     j = ((linear_index-i)/user->mx) % user->mx;
746     k = ((linear_index-i)/user->mx-j) / user->mx;
747     vx = h*(i+0.5);
748     vy = h*(j+0.5);
749     vz = h*(k+0.5);
750     ierr = VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES);CHKERRQ(ierr);
751     ierr = VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES);CHKERRQ(ierr);
752     ierr = VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES);CHKERRQ(ierr);
753     for (is=0; is<2; is++) {
754       for (js=0; js<2; js++) {
755         for (ks=0; ks<2; ks++) {
756           ls = is*4 + js*2 + ks;
757           if (ls<user->ns) {
758             l =ls*n + linear_index;
759             /* remap */
760             subindex = l%n;
761             subvec = l/n;
762             nrank=0;
763             while (subindex >= subranges[nrank+1]) nrank++;
764             offset = subindex - subranges[nrank];
765             istart=0;
766             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
767             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
768             l = istart+offset;
769             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
770             ierr = VecSetValues(user->q,1,&l,&v,INSERT_VALUES);CHKERRQ(ierr);
771           }
772         }
773       }
774     }
775   }
776 
777   ierr = VecAssemblyBegin(XX);CHKERRQ(ierr);
778   ierr = VecAssemblyEnd(XX);CHKERRQ(ierr);
779   ierr = VecAssemblyBegin(YY);CHKERRQ(ierr);
780   ierr = VecAssemblyEnd(YY);CHKERRQ(ierr);
781   ierr = VecAssemblyBegin(ZZ);CHKERRQ(ierr);
782   ierr = VecAssemblyEnd(ZZ);CHKERRQ(ierr);
783   ierr = VecAssemblyBegin(user->q);CHKERRQ(ierr);
784   ierr = VecAssemblyEnd(user->q);CHKERRQ(ierr);
785 
786   /* Compute true parameter function
787      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
788   ierr = VecCopy(XX,XXwork);CHKERRQ(ierr);
789   ierr = VecCopy(YY,YYwork);CHKERRQ(ierr);
790   ierr = VecCopy(ZZ,ZZwork);CHKERRQ(ierr);
791 
792   ierr = VecShift(XXwork,-0.25);CHKERRQ(ierr);
793   ierr = VecShift(YYwork,-0.25);CHKERRQ(ierr);
794   ierr = VecShift(ZZwork,-0.25);CHKERRQ(ierr);
795 
796   ierr = VecPointwiseMult(XXwork,XXwork,XXwork);CHKERRQ(ierr);
797   ierr = VecPointwiseMult(YYwork,YYwork,YYwork);CHKERRQ(ierr);
798   ierr = VecPointwiseMult(ZZwork,ZZwork,ZZwork);CHKERRQ(ierr);
799 
800   ierr = VecCopy(XXwork,UTwork);CHKERRQ(ierr);
801   ierr = VecAXPY(UTwork,1.0,YYwork);CHKERRQ(ierr);
802   ierr = VecAXPY(UTwork,1.0,ZZwork);CHKERRQ(ierr);
803   ierr = VecScale(UTwork,-20.0);CHKERRQ(ierr);
804   ierr = VecExp(UTwork);CHKERRQ(ierr);
805   ierr = VecCopy(UTwork,user->utrue);CHKERRQ(ierr);
806 
807   ierr = VecCopy(XX,XXwork);CHKERRQ(ierr);
808   ierr = VecCopy(YY,YYwork);CHKERRQ(ierr);
809   ierr = VecCopy(ZZ,ZZwork);CHKERRQ(ierr);
810 
811   ierr = VecShift(XXwork,-0.75);CHKERRQ(ierr);
812   ierr = VecShift(YYwork,-0.75);CHKERRQ(ierr);
813   ierr = VecShift(ZZwork,-0.75);CHKERRQ(ierr);
814 
815   ierr = VecPointwiseMult(XXwork,XXwork,XXwork);CHKERRQ(ierr);
816   ierr = VecPointwiseMult(YYwork,YYwork,YYwork);CHKERRQ(ierr);
817   ierr = VecPointwiseMult(ZZwork,ZZwork,ZZwork);CHKERRQ(ierr);
818 
819   ierr = VecCopy(XXwork,UTwork);CHKERRQ(ierr);
820   ierr = VecAXPY(UTwork,1.0,YYwork);CHKERRQ(ierr);
821   ierr = VecAXPY(UTwork,1.0,ZZwork);CHKERRQ(ierr);
822   ierr = VecScale(UTwork,-20.0);CHKERRQ(ierr);
823   ierr = VecExp(UTwork);CHKERRQ(ierr);
824 
825   ierr = VecAXPY(user->utrue,-1.0,UTwork);CHKERRQ(ierr);
826 
827   ierr = VecDestroy(&XX);CHKERRQ(ierr);
828   ierr = VecDestroy(&YY);CHKERRQ(ierr);
829   ierr = VecDestroy(&ZZ);CHKERRQ(ierr);
830   ierr = VecDestroy(&XXwork);CHKERRQ(ierr);
831   ierr = VecDestroy(&YYwork);CHKERRQ(ierr);
832   ierr = VecDestroy(&ZZwork);CHKERRQ(ierr);
833   ierr = VecDestroy(&UTwork);CHKERRQ(ierr);
834 
835   /* Initial guess and reference model */
836   ierr = VecDuplicate(user->utrue,&user->ur);CHKERRQ(ierr);
837   ierr = VecSum(user->utrue,&meanut);CHKERRQ(ierr);
838   meanut = meanut / n;
839   ierr = VecSet(user->ur,meanut);CHKERRQ(ierr);
840   ierr = VecCopy(user->ur,user->u);CHKERRQ(ierr);
841 
842   /* Generate Grad matrix */
843   ierr = MatCreate(PETSC_COMM_WORLD,&user->Grad);CHKERRQ(ierr);
844   ierr = MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n);CHKERRQ(ierr);
845   ierr = MatSetFromOptions(user->Grad);CHKERRQ(ierr);
846   ierr = MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL);CHKERRQ(ierr);
847   ierr = MatSeqAIJSetPreallocation(user->Grad,2,NULL);CHKERRQ(ierr);
848   ierr = MatGetOwnershipRange(user->Grad,&istart,&iend);CHKERRQ(ierr);
849 
850   for (i=istart; i<iend; i++) {
851     if (i<m/3) {
852       iblock = i / (user->mx-1);
853       j = iblock*user->mx + (i % (user->mx-1));
854       ierr = MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
855       j = j+1;
856       ierr = MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);CHKERRQ(ierr);
857     }
858     if (i>=m/3 && i<2*m/3) {
859       iblock = (i-m/3) / (user->mx*(user->mx-1));
860       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
861       ierr = MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
862       j = j + user->mx;
863       ierr = MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);CHKERRQ(ierr);
864     }
865     if (i>=2*m/3) {
866       j = i-2*m/3;
867       ierr = MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
868       j = j + user->mx*user->mx;
869       ierr = MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);CHKERRQ(ierr);
870     }
871   }
872 
873   ierr = MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
874   ierr = MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
875 
876   /* Generate arithmetic averaging matrix Av */
877   ierr = MatCreate(PETSC_COMM_WORLD,&user->Av);CHKERRQ(ierr);
878   ierr = MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n);CHKERRQ(ierr);
879   ierr = MatSetFromOptions(user->Av);CHKERRQ(ierr);
880   ierr = MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL);CHKERRQ(ierr);
881   ierr = MatSeqAIJSetPreallocation(user->Av,2,NULL);CHKERRQ(ierr);
882   ierr = MatGetOwnershipRange(user->Av,&istart,&iend);CHKERRQ(ierr);
883 
884   for (i=istart; i<iend; i++) {
885     if (i<m/3) {
886       iblock = i / (user->mx-1);
887       j = iblock*user->mx + (i % (user->mx-1));
888       ierr = MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);CHKERRQ(ierr);
889       j = j+1;
890       ierr = MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);CHKERRQ(ierr);
891     }
892     if (i>=m/3 && i<2*m/3) {
893       iblock = (i-m/3) / (user->mx*(user->mx-1));
894       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
895       ierr = MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);CHKERRQ(ierr);
896       j = j + user->mx;
897       ierr = MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);CHKERRQ(ierr);
898     }
899     if (i>=2*m/3) {
900       j = i-2*m/3;
901       ierr = MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);CHKERRQ(ierr);
902       j = j + user->mx*user->mx;
903       ierr = MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);CHKERRQ(ierr);
904     }
905   }
906 
907   ierr = MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
908   ierr = MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
909 
910   ierr = MatCreate(PETSC_COMM_WORLD,&user->L);CHKERRQ(ierr);
911   ierr = MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n);CHKERRQ(ierr);
912   ierr = MatSetFromOptions(user->L);CHKERRQ(ierr);
913   ierr = MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL);CHKERRQ(ierr);
914   ierr = MatSeqAIJSetPreallocation(user->L,2,NULL);CHKERRQ(ierr);
915   ierr = MatGetOwnershipRange(user->L,&istart,&iend);CHKERRQ(ierr);
916 
917   for (i=istart; i<iend; i++) {
918     if (i<m/3) {
919       iblock = i / (user->mx-1);
920       j = iblock*user->mx + (i % (user->mx-1));
921       ierr = MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
922       j = j+1;
923       ierr = MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);CHKERRQ(ierr);
924     }
925     if (i>=m/3 && i<2*m/3) {
926       iblock = (i-m/3) / (user->mx*(user->mx-1));
927       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
928       ierr = MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
929       j = j + user->mx;
930       ierr = MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);CHKERRQ(ierr);
931     }
932     if (i>=2*m/3 && i<m) {
933       j = i-2*m/3;
934       ierr = MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
935       j = j + user->mx*user->mx;
936       ierr = MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);CHKERRQ(ierr);
937     }
938     if (i>=m) {
939       j = i - m;
940       ierr = MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES);CHKERRQ(ierr);
941     }
942   }
943   ierr = MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
944   ierr = MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
945   ierr = MatScale(user->L,PetscPowScalar(h,1.5));CHKERRQ(ierr);
946 
947   /* Generate Div matrix */
948   if (!user->use_ptap) {
949     /* Generate Div matrix */
950     ierr = MatCreate(PETSC_COMM_WORLD,&user->Div);CHKERRQ(ierr);
951     ierr = MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m);CHKERRQ(ierr);
952     ierr = MatSetFromOptions(user->Div);CHKERRQ(ierr);
953     ierr = MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL);CHKERRQ(ierr);
954     ierr = MatSeqAIJSetPreallocation(user->Div,6,NULL);CHKERRQ(ierr);
955     ierr = MatGetOwnershipRange(user->Grad,&istart,&iend);CHKERRQ(ierr);
956 
957     for (i=istart; i<iend; i++) {
958       if (i<m/3) {
959         iblock = i / (user->mx-1);
960         j = iblock*user->mx + (i % (user->mx-1));
961         ierr = MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
962         j = j+1;
963         ierr = MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);CHKERRQ(ierr);
964       }
965       if (i>=m/3 && i<2*m/3) {
966         iblock = (i-m/3) / (user->mx*(user->mx-1));
967         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
968         ierr = MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
969         j = j + user->mx;
970         ierr = MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);CHKERRQ(ierr);
971       }
972       if (i>=2*m/3) {
973         j = i-2*m/3;
974         ierr = MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);CHKERRQ(ierr);
975         j = j + user->mx*user->mx;
976         ierr = MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);CHKERRQ(ierr);
977       }
978     }
979 
980     ierr = MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     ierr = MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982     ierr = MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork);CHKERRQ(ierr);
983   } else {
984     ierr = MatCreate(PETSC_COMM_WORLD,&user->Diag);CHKERRQ(ierr);
985     ierr = MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m);CHKERRQ(ierr);
986     ierr = MatSetFromOptions(user->Diag);CHKERRQ(ierr);
987     ierr = MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL);CHKERRQ(ierr);
988     ierr = MatSeqAIJSetPreallocation(user->Diag,1,NULL);CHKERRQ(ierr);
989   }
990 
991   /* Build work vectors and matrices */
992   ierr = VecCreate(PETSC_COMM_WORLD,&user->S);CHKERRQ(ierr);
993   ierr = VecSetSizes(user->S, PETSC_DECIDE, m);CHKERRQ(ierr);
994   ierr = VecSetFromOptions(user->S);CHKERRQ(ierr);
995 
996   ierr = VecCreate(PETSC_COMM_WORLD,&user->lwork);CHKERRQ(ierr);
997   ierr = VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx);CHKERRQ(ierr);
998   ierr = VecSetFromOptions(user->lwork);CHKERRQ(ierr);
999 
1000   ierr = MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork);CHKERRQ(ierr);
1001 
1002   ierr = VecDuplicate(user->S,&user->Swork);CHKERRQ(ierr);
1003   ierr = VecDuplicate(user->S,&user->Sdiag);CHKERRQ(ierr);
1004   ierr = VecDuplicate(user->S,&user->Av_u);CHKERRQ(ierr);
1005   ierr = VecDuplicate(user->S,&user->Twork);CHKERRQ(ierr);
1006   ierr = VecDuplicate(user->y,&user->ywork);CHKERRQ(ierr);
1007   ierr = VecDuplicate(user->u,&user->Ywork);CHKERRQ(ierr);
1008   ierr = VecDuplicate(user->u,&user->uwork);CHKERRQ(ierr);
1009   ierr = VecDuplicate(user->u,&user->js_diag);CHKERRQ(ierr);
1010   ierr = VecDuplicate(user->c,&user->cwork);CHKERRQ(ierr);
1011   ierr = VecDuplicate(user->d,&user->dwork);CHKERRQ(ierr);
1012 
1013   /* Create a matrix-free shell user->Jd for computing B*x */
1014   ierr = MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd);CHKERRQ(ierr);
1015   ierr = MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult);CHKERRQ(ierr);
1016   ierr = MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose);CHKERRQ(ierr);
1017 
1018   /* Compute true state function ytrue given utrue */
1019   ierr = VecDuplicate(user->y,&user->ytrue);CHKERRQ(ierr);
1020 
1021   /* First compute Av_u = Av*exp(-u) */
1022   ierr = VecSet(user->uwork, 0);CHKERRQ(ierr);
1023   ierr = VecAXPY(user->uwork,-1.0,user->utrue);CHKERRQ(ierr); /* Note: user->utrue */
1024   ierr = VecExp(user->uwork);CHKERRQ(ierr);
1025   ierr = MatMult(user->Av,user->uwork,user->Av_u);CHKERRQ(ierr);
1026 
1027   /* Next form DSG = Div*S*Grad */
1028   ierr = VecCopy(user->Av_u,user->Swork);CHKERRQ(ierr);
1029   ierr = VecReciprocal(user->Swork);CHKERRQ(ierr);
1030   if (user->use_ptap) {
1031     ierr = MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);CHKERRQ(ierr);
1032     ierr = MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);CHKERRQ(ierr);
1033   } else {
1034     ierr = MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1035     ierr = MatDiagonalScale(user->Divwork,NULL,user->Swork);CHKERRQ(ierr);
1036 
1037     ierr = MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);CHKERRQ(ierr);
1038   }
1039 
1040   ierr = MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1041   ierr = MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr);
1042 
1043   if (user->use_lrc == PETSC_TRUE) {
1044     v=PetscSqrtReal(1.0 /user->ndesign);
1045     ierr = PetscMalloc1(user->ndesign,&user->ones);CHKERRQ(ierr);
1046 
1047     for (i=0;i<user->ndesign;i++) {
1048       user->ones[i]=v;
1049     }
1050     ierr = MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock);CHKERRQ(ierr);
1054     ierr = MatSetUp(user->JsBlock);CHKERRQ(ierr);
1055   } else {
1056     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1057     ierr = MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock);CHKERRQ(ierr);
1058     ierr = MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult);CHKERRQ(ierr);
1059     ierr = MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult);CHKERRQ(ierr);
1060   }
1061   ierr = MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1062   ierr = MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr);
1063   ierr = MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js);CHKERRQ(ierr);
1064   ierr = MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult);CHKERRQ(ierr);
1065   ierr = MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult);CHKERRQ(ierr);
1066   ierr = MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1067   ierr = MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr);
1068 
1069   ierr = MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv);CHKERRQ(ierr);
1070   ierr = MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult);CHKERRQ(ierr);
1071   ierr = MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult);CHKERRQ(ierr);
1072   ierr = MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1073   ierr = MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr);
1074 
1075   ierr = MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1076   ierr = MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr);
1077   /* Now solve for ytrue */
1078   ierr = KSPCreate(PETSC_COMM_WORLD,&user->solver);CHKERRQ(ierr);
1079   ierr = KSPSetFromOptions(user->solver);CHKERRQ(ierr);
1080 
1081   ierr = KSPSetOperators(user->solver,user->JsBlock,user->DSG);CHKERRQ(ierr);
1082 
1083   ierr = MatMult(user->JsInv,user->q,user->ytrue);CHKERRQ(ierr);
1084   /* First compute Av_u = Av*exp(-u) */
1085   ierr = VecSet(user->uwork,0);CHKERRQ(ierr);
1086   ierr = VecAXPY(user->uwork,-1.0,user->u);CHKERRQ(ierr); /* Note: user->u */
1087   ierr = VecExp(user->uwork);CHKERRQ(ierr);
1088   ierr = MatMult(user->Av,user->uwork,user->Av_u);CHKERRQ(ierr);
1089 
1090   /* Next update DSG = Div*S*Grad  with user->u */
1091   ierr = VecCopy(user->Av_u,user->Swork);CHKERRQ(ierr);
1092   ierr = VecReciprocal(user->Swork);CHKERRQ(ierr);
1093   if (user->use_ptap) {
1094     ierr = MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);CHKERRQ(ierr);
1095     ierr = MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);CHKERRQ(ierr);
1096   } else {
1097     ierr = MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1098     ierr = MatDiagonalScale(user->Divwork,NULL,user->Av_u);CHKERRQ(ierr);
1099     ierr = MatProductNumeric(user->DSG);CHKERRQ(ierr);
1100   }
1101 
1102   /* Now solve for y */
1103 
1104   ierr = MatMult(user->JsInv,user->q,user->y);CHKERRQ(ierr);
1105 
1106   user->ksp_its_initial = user->ksp_its;
1107   user->ksp_its = 0;
1108   /* Construct projection matrix Q (blocks) */
1109   ierr = MatCreate(PETSC_COMM_WORLD,&user->Q);CHKERRQ(ierr);
1110   ierr = MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign);CHKERRQ(ierr);
1111   ierr = MatSetFromOptions(user->Q);CHKERRQ(ierr);
1112   ierr = MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL);CHKERRQ(ierr);
1113   ierr = MatSeqAIJSetPreallocation(user->Q,8,NULL);CHKERRQ(ierr);
1114 
1115   for (i=0; i<user->mx; i++) {
1116     x[i] = h*(i+0.5);
1117     y[i] = h*(i+0.5);
1118     z[i] = h*(i+0.5);
1119   }
1120   ierr = MatGetOwnershipRange(user->Q,&istart,&iend);CHKERRQ(ierr);
1121 
1122   nx = user->mx; ny = user->mx; nz = user->mx;
1123   for (i=istart; i<iend; i++) {
1124 
1125     xri = xr[i];
1126     im = 0;
1127     xim = x[im];
1128     while (xri>xim && im<nx) {
1129       im = im+1;
1130       xim = x[im];
1131     }
1132     indx1 = im-1;
1133     indx2 = im;
1134     dx1 = xri - x[indx1];
1135     dx2 = x[indx2] - xri;
1136 
1137     yri = yr[i];
1138     im = 0;
1139     yim = y[im];
1140     while (yri>yim && im<ny) {
1141       im = im+1;
1142       yim = y[im];
1143     }
1144     indy1 = im-1;
1145     indy2 = im;
1146     dy1 = yri - y[indy1];
1147     dy2 = y[indy2] - yri;
1148 
1149     zri = zr[i];
1150     im = 0;
1151     zim = z[im];
1152     while (zri>zim && im<nz) {
1153       im = im+1;
1154       zim = z[im];
1155     }
1156     indz1 = im-1;
1157     indz2 = im;
1158     dz1 = zri - z[indz1];
1159     dz2 = z[indz2] - zri;
1160 
1161     Dx = x[indx2] - x[indx1];
1162     Dy = y[indy2] - y[indy1];
1163     Dz = z[indz2] - z[indz1];
1164 
1165     j = indx1 + indy1*nx + indz1*nx*ny;
1166     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1167     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1168 
1169     j = indx1 + indy1*nx + indz2*nx*ny;
1170     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1171     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1172 
1173     j = indx1 + indy2*nx + indz1*nx*ny;
1174     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1175     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1176 
1177     j = indx1 + indy2*nx + indz2*nx*ny;
1178     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1179     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1180 
1181     j = indx2 + indy1*nx + indz1*nx*ny;
1182     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1183     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1184 
1185     j = indx2 + indy1*nx + indz2*nx*ny;
1186     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1187     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1188 
1189     j = indx2 + indy2*nx + indz1*nx*ny;
1190     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1191     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1192 
1193     j = indx2 + indy2*nx + indz2*nx*ny;
1194     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1195     ierr = MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr);
1196   }
1197 
1198   ierr = MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1199   ierr = MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1200   /* Create MQ (composed of blocks of Q */
1201   ierr = MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ);CHKERRQ(ierr);
1202   ierr = MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult);CHKERRQ(ierr);
1203   ierr = MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose);CHKERRQ(ierr);
1204 
1205   /* Add noise to the measurement data */
1206   ierr = VecSet(user->ywork,1.0);CHKERRQ(ierr);
1207   ierr = VecAYPX(user->ywork,user->noise,user->ytrue);CHKERRQ(ierr);
1208   ierr = MatMult(user->MQ,user->ywork,user->d);CHKERRQ(ierr);
1209 
1210   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1211   ierr = PetscFree(x);CHKERRQ(ierr);
1212   ierr = PetscFree(y);CHKERRQ(ierr);
1213   ierr = PetscFree(z);CHKERRQ(ierr);
1214   ierr = PetscLogStagePop();CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 PetscErrorCode EllipticDestroy(AppCtx *user)
1219 {
1220   PetscErrorCode ierr;
1221   PetscInt       i;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDestroy(&user->DSG);CHKERRQ(ierr);
1225   ierr = KSPDestroy(&user->solver);CHKERRQ(ierr);
1226   ierr = MatDestroy(&user->Q);CHKERRQ(ierr);
1227   ierr = MatDestroy(&user->MQ);CHKERRQ(ierr);
1228   if (!user->use_ptap) {
1229     ierr = MatDestroy(&user->Div);CHKERRQ(ierr);
1230     ierr = MatDestroy(&user->Divwork);CHKERRQ(ierr);
1231   } else {
1232     ierr = MatDestroy(&user->Diag);CHKERRQ(ierr);
1233   }
1234   if (user->use_lrc) {
1235     ierr = MatDestroy(&user->Ones);CHKERRQ(ierr);
1236   }
1237 
1238   ierr = MatDestroy(&user->Grad);CHKERRQ(ierr);
1239   ierr = MatDestroy(&user->Av);CHKERRQ(ierr);
1240   ierr = MatDestroy(&user->Avwork);CHKERRQ(ierr);
1241   ierr = MatDestroy(&user->L);CHKERRQ(ierr);
1242   ierr = MatDestroy(&user->Js);CHKERRQ(ierr);
1243   ierr = MatDestroy(&user->Jd);CHKERRQ(ierr);
1244   ierr = MatDestroy(&user->JsBlock);CHKERRQ(ierr);
1245   ierr = MatDestroy(&user->JsInv);CHKERRQ(ierr);
1246 
1247   ierr = VecDestroy(&user->x);CHKERRQ(ierr);
1248   ierr = VecDestroy(&user->u);CHKERRQ(ierr);
1249   ierr = VecDestroy(&user->uwork);CHKERRQ(ierr);
1250   ierr = VecDestroy(&user->utrue);CHKERRQ(ierr);
1251   ierr = VecDestroy(&user->y);CHKERRQ(ierr);
1252   ierr = VecDestroy(&user->ywork);CHKERRQ(ierr);
1253   ierr = VecDestroy(&user->ytrue);CHKERRQ(ierr);
1254   ierr = VecDestroy(&user->c);CHKERRQ(ierr);
1255   ierr = VecDestroy(&user->cwork);CHKERRQ(ierr);
1256   ierr = VecDestroy(&user->ur);CHKERRQ(ierr);
1257   ierr = VecDestroy(&user->q);CHKERRQ(ierr);
1258   ierr = VecDestroy(&user->d);CHKERRQ(ierr);
1259   ierr = VecDestroy(&user->dwork);CHKERRQ(ierr);
1260   ierr = VecDestroy(&user->lwork);CHKERRQ(ierr);
1261   ierr = VecDestroy(&user->S);CHKERRQ(ierr);
1262   ierr = VecDestroy(&user->Swork);CHKERRQ(ierr);
1263   ierr = VecDestroy(&user->Sdiag);CHKERRQ(ierr);
1264   ierr = VecDestroy(&user->Ywork);CHKERRQ(ierr);
1265   ierr = VecDestroy(&user->Twork);CHKERRQ(ierr);
1266   ierr = VecDestroy(&user->Av_u);CHKERRQ(ierr);
1267   ierr = VecDestroy(&user->js_diag);CHKERRQ(ierr);
1268   ierr = ISDestroy(&user->s_is);CHKERRQ(ierr);
1269   ierr = ISDestroy(&user->d_is);CHKERRQ(ierr);
1270   ierr = VecDestroy(&user->suby);CHKERRQ(ierr);
1271   ierr = VecDestroy(&user->subd);CHKERRQ(ierr);
1272   ierr = VecDestroy(&user->subq);CHKERRQ(ierr);
1273   ierr = VecScatterDestroy(&user->state_scatter);CHKERRQ(ierr);
1274   ierr = VecScatterDestroy(&user->design_scatter);CHKERRQ(ierr);
1275   for (i=0;i<user->ns;i++) {
1276     ierr = VecScatterDestroy(&user->yi_scatter[i]);CHKERRQ(ierr);
1277     ierr = VecScatterDestroy(&user->di_scatter[i]);CHKERRQ(ierr);
1278   }
1279   ierr = PetscFree(user->yi_scatter);CHKERRQ(ierr);
1280   ierr = PetscFree(user->di_scatter);CHKERRQ(ierr);
1281   if (user->use_lrc) {
1282     ierr = PetscFree(user->ones);CHKERRQ(ierr);
1283     ierr = MatDestroy(&user->Ones);CHKERRQ(ierr);
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1289 {
1290   PetscErrorCode ierr;
1291   Vec            X;
1292   PetscReal      unorm,ynorm;
1293   AppCtx         *user = (AppCtx*)ptr;
1294 
1295   PetscFunctionBegin;
1296   ierr = TaoGetSolutionVector(tao,&X);CHKERRQ(ierr);
1297   ierr = Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter);CHKERRQ(ierr);
1298   ierr = VecAXPY(user->ywork,-1.0,user->ytrue);CHKERRQ(ierr);
1299   ierr = VecAXPY(user->uwork,-1.0,user->utrue);CHKERRQ(ierr);
1300   ierr = VecNorm(user->uwork,NORM_2,&unorm);CHKERRQ(ierr);
1301   ierr = VecNorm(user->ywork,NORM_2,&ynorm);CHKERRQ(ierr);
1302   ierr = PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 /*TEST
1307 
1308    build:
1309       requires: !complex
1310 
1311    test:
1312       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1313       requires: !single
1314 
1315    test:
1316       suffix: 2
1317       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1318       requires: !single
1319 
1320 TEST*/
1321