xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /* Number of total variables */
5   PetscInt m; /* Number of constraints */
6   PetscInt nstate;
7   PetscInt ndesign;
8   PetscInt mx; /* grid points in each direction */
9   PetscInt ns; /* Number of data samples (1<=ns<=8)
10                   Currently only ns=1 is supported */
11   PetscInt ndata; /* Number of data points per sample */
12   IS       s_is;
13   IS       d_is;
14 
15   VecScatter state_scatter;
16   VecScatter design_scatter;
17   VecScatter *yi_scatter, *di_scatter;
18   Vec        suby,subq,subd;
19   Mat        Js,Jd,JsPrec,JsInv,JsBlock;
20 
21   PetscReal alpha; /* Regularization parameter */
22   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
23   PetscReal noise; /* Amount of noise to add to data */
24   PetscReal *ones;
25   Mat       Q;
26   Mat       MQ;
27   Mat       L;
28 
29   Mat Grad;
30   Mat Av,Avwork;
31   Mat Div, Divwork;
32   Mat DSG;
33   Mat Diag,Ones;
34 
35   Vec q;
36   Vec ur; /* reference */
37 
38   Vec d;
39   Vec dwork;
40 
41   Vec x; /* super vec of y,u */
42 
43   Vec y; /* state variables */
44   Vec ywork;
45 
46   Vec ytrue;
47 
48   Vec u; /* design variables */
49   Vec uwork;
50 
51   Vec utrue;
52 
53   Vec js_diag;
54 
55   Vec c; /* constraint vector */
56   Vec cwork;
57 
58   Vec lwork;
59   Vec S;
60   Vec Swork,Twork,Sdiag,Ywork;
61   Vec Av_u;
62 
63   KSP solver;
64   PC  prec;
65 
66   PetscReal tola,tolb,tolc,told;
67   PetscInt  ksp_its;
68   PetscInt  ksp_its_initial;
69   PetscLogStage stages[10];
70   PetscBool use_ptap;
71   PetscBool use_lrc;
72 } AppCtx;
73 
74 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
75 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
76 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
77 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
78 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
79 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
80 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
81 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
82 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
83 PetscErrorCode EllipticInitialize(AppCtx*);
84 PetscErrorCode EllipticDestroy(AppCtx*);
85 PetscErrorCode EllipticMonitor(Tao, void*);
86 
87 PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
88 PetscErrorCode StateMatMult(Mat,Vec,Vec);
89 
90 PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
91 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
92 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
93 
94 PetscErrorCode QMatMult(Mat,Vec,Vec);
95 PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);
96 
97 static  char help[]="";
98 
99 int main(int argc, char **argv)
100 {
101   Vec                x0;
102   Tao                tao;
103   AppCtx             user;
104   PetscInt           ntests = 1;
105   PetscInt           i;
106 
107   PetscFunctionBeginUser;
108   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
109   user.mx = 8;
110   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);
111   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
112   user.ns = 6;
113   PetscCall(PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL));
114   user.ndata = 64;
115   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
116   user.alpha = 0.1;
117   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
118   user.beta = 0.00001;
119   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
120   user.noise = 0.01;
121   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
122 
123   user.use_ptap = PETSC_FALSE;
124   PetscCall(PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL));
125   user.use_lrc = PETSC_FALSE;
126   PetscCall(PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL));
127   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
128   PetscOptionsEnd();
129 
130   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
131   user.nstate =  user.m;
132   user.ndesign = user.mx*user.mx*user.mx;
133   user.n = user.nstate + user.ndesign; /* number of variables */
134 
135   /* Create TAO solver and set desired solution method */
136   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
137   PetscCall(TaoSetType(tao,TAOLCL));
138 
139   /* Set up initial vectors and matrices */
140   PetscCall(EllipticInitialize(&user));
141 
142   PetscCall(Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter));
143   PetscCall(VecDuplicate(user.x,&x0));
144   PetscCall(VecCopy(user.x,x0));
145 
146   /* Set solution vector with an initial guess */
147   PetscCall(TaoSetSolution(tao,user.x));
148   PetscCall(TaoSetObjective(tao, FormFunction, &user));
149   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
150   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
151 
152   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user));
153   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
154 
155   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
156   PetscCall(TaoSetFromOptions(tao));
157 
158   /* SOLVE THE APPLICATION */
159   PetscCall(PetscLogStageRegister("Trials",&user.stages[1]));
160   PetscCall(PetscLogStagePush(user.stages[1]));
161   for (i=0; i<ntests; i++) {
162     PetscCall(TaoSolve(tao));
163     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %" PetscInt_FMT "\n",user.ksp_its));
164     PetscCall(VecCopy(x0,user.x));
165   }
166   PetscCall(PetscLogStagePop());
167   PetscCall(PetscBarrier((PetscObject)user.x));
168   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
169   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",user.ksp_its_initial));
170 
171   PetscCall(TaoDestroy(&tao));
172   PetscCall(VecDestroy(&x0));
173   PetscCall(EllipticDestroy(&user));
174   PetscCall(PetscFinalize());
175   return 0;
176 }
177 /* ------------------------------------------------------------------- */
178 /*
179    dwork = Qy - d
180    lwork = L*(u-ur)
181    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
182 */
183 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
184 {
185   PetscReal      d1=0,d2=0;
186   AppCtx         *user = (AppCtx*)ptr;
187 
188   PetscFunctionBegin;
189   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
190   PetscCall(MatMult(user->MQ,user->y,user->dwork));
191   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
192   PetscCall(VecDot(user->dwork,user->dwork,&d1));
193   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
194   PetscCall(MatMult(user->L,user->uwork,user->lwork));
195   PetscCall(VecDot(user->lwork,user->lwork,&d2));
196   *f = 0.5 * (d1 + user->alpha*d2);
197   PetscFunctionReturn(0);
198 }
199 
200 /* ------------------------------------------------------------------- */
201 /*
202     state: g_s = Q' *(Qy - d)
203     design: g_d = alpha*L'*L*(u-ur)
204 */
205 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
206 {
207   AppCtx         *user = (AppCtx*)ptr;
208 
209   PetscFunctionBegin;
210   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
211   PetscCall(MatMult(user->MQ,user->y,user->dwork));
212   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
213   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
214   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
215   PetscCall(MatMult(user->L,user->uwork,user->lwork));
216   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
217   PetscCall(VecScale(user->uwork, user->alpha));
218   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
219   PetscFunctionReturn(0);
220 }
221 
222 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
223 {
224   PetscReal      d1,d2;
225   AppCtx         *user = (AppCtx*)ptr;
226 
227   PetscFunctionBegin;
228   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
229   PetscCall(MatMult(user->MQ,user->y,user->dwork));
230   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
231   PetscCall(VecDot(user->dwork,user->dwork,&d1));
232   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
233 
234   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
235   PetscCall(MatMult(user->L,user->uwork,user->lwork));
236   PetscCall(VecDot(user->lwork,user->lwork,&d2));
237   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
238   PetscCall(VecScale(user->uwork, user->alpha));
239   *f = 0.5 * (d1 + user->alpha*d2);
240   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
241   PetscFunctionReturn(0);
242 }
243 
244 /* ------------------------------------------------------------------- */
245 /* A
246 MatShell object
247 */
248 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
249 {
250   AppCtx         *user = (AppCtx*)ptr;
251 
252   PetscFunctionBegin;
253   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
254   /* DSG = Div * (1/Av_u) * Grad */
255   PetscCall(VecSet(user->uwork,0));
256   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
257   PetscCall(VecExp(user->uwork));
258   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
259   PetscCall(VecCopy(user->Av_u,user->Swork));
260   PetscCall(VecReciprocal(user->Swork));
261   if (user->use_ptap) {
262     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
263     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
264   } else {
265     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
266     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
267     PetscCall(MatProductNumeric(user->DSG));
268   }
269   PetscFunctionReturn(0);
270 }
271 /* ------------------------------------------------------------------- */
272 /* B */
273 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
274 {
275   AppCtx         *user = (AppCtx*)ptr;
276 
277   PetscFunctionBegin;
278   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
279   PetscFunctionReturn(0);
280 }
281 
282 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
283 {
284   PetscReal      sum;
285   AppCtx         *user;
286 
287   PetscFunctionBegin;
288   PetscCall(MatShellGetContext(J_shell,&user));
289   PetscCall(MatMult(user->DSG,X,Y));
290   PetscCall(VecSum(X,&sum));
291   sum /= user->ndesign;
292   PetscCall(VecShift(Y,sum));
293   PetscFunctionReturn(0);
294 }
295 
296 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
297 {
298   PetscInt       i;
299   AppCtx         *user;
300 
301   PetscFunctionBegin;
302   PetscCall(MatShellGetContext(J_shell,&user));
303   if (user->ns == 1) {
304     PetscCall(MatMult(user->JsBlock,X,Y));
305   } else {
306     for (i=0;i<user->ns;i++) {
307       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
308       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
309       PetscCall(MatMult(user->JsBlock,user->subq,user->suby));
310       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
317 {
318   PetscInt       its,i;
319   AppCtx         *user;
320 
321   PetscFunctionBegin;
322   PetscCall(MatShellGetContext(J_shell,&user));
323   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
324   if (Y == user->ytrue) {
325     /* First solve is done using true solution to set up problem */
326     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
327   } else {
328     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
329   }
330   if (user->ns == 1) {
331     PetscCall(KSPSolve(user->solver,X,Y));
332     PetscCall(KSPGetIterationNumber(user->solver,&its));
333     user->ksp_its+=its;
334   } else {
335     for (i=0;i<user->ns;i++) {
336       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
337       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
338       PetscCall(KSPSolve(user->solver,user->subq,user->suby));
339       PetscCall(KSPGetIterationNumber(user->solver,&its));
340       user->ksp_its+=its;
341       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
342     }
343   }
344   PetscFunctionReturn(0);
345 }
346 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
347 {
348   AppCtx         *user;
349   PetscInt       i;
350 
351   PetscFunctionBegin;
352   PetscCall(MatShellGetContext(J_shell,&user));
353   if (user->ns == 1) {
354     PetscCall(MatMult(user->Q,X,Y));
355   } else {
356     for (i=0;i<user->ns;i++) {
357       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
358       PetscCall(Scatter(Y,user->subd,user->di_scatter[i],0,0));
359       PetscCall(MatMult(user->Q,user->subq,user->subd));
360       PetscCall(Gather(Y,user->subd,user->di_scatter[i],0,0));
361     }
362   }
363   PetscFunctionReturn(0);
364 }
365 
366 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
367 {
368   AppCtx         *user;
369   PetscInt       i;
370 
371   PetscFunctionBegin;
372   PetscCall(MatShellGetContext(J_shell,&user));
373   if (user->ns == 1) {
374     PetscCall(MatMultTranspose(user->Q,X,Y));
375   } else {
376     for (i=0;i<user->ns;i++) {
377       PetscCall(Scatter(X,user->subd,user->di_scatter[i],0,0));
378       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
379       PetscCall(MatMultTranspose(user->Q,user->subd,user->suby));
380       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
381     }
382   }
383   PetscFunctionReturn(0);
384 }
385 
386 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
387 {
388   PetscInt       i;
389   AppCtx         *user;
390 
391   PetscFunctionBegin;
392   PetscCall(MatShellGetContext(J_shell,&user));
393 
394   /* sdiag(1./v) */
395   PetscCall(VecSet(user->uwork,0));
396   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
397   PetscCall(VecExp(user->uwork));
398 
399   /* sdiag(1./((Av*(1./v)).^2)) */
400   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
401   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
402   PetscCall(VecReciprocal(user->Swork));
403 
404   /* (Av * (sdiag(1./v) * b)) */
405   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
406   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
407 
408   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
409   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
410 
411   if (user->ns == 1) {
412     /* (sdiag(Grad*y(:,i)) */
413     PetscCall(MatMult(user->Grad,user->y,user->Twork));
414 
415     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
416     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
417     PetscCall(MatMultTranspose(user->Grad,user->Swork,Y));
418   } else {
419     for (i=0;i<user->ns;i++) {
420       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
421       PetscCall(Scatter(Y,user->subq,user->yi_scatter[i],0,0));
422 
423       PetscCall(MatMult(user->Grad,user->suby,user->Twork));
424       PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
425       PetscCall(MatMultTranspose(user->Grad,user->Twork,user->subq));
426       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
427       PetscCall(Gather(Y,user->subq,user->yi_scatter[i],0,0));
428     }
429   }
430   PetscFunctionReturn(0);
431 }
432 
433 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
434 {
435   PetscInt       i;
436   AppCtx         *user;
437 
438   PetscFunctionBegin;
439   PetscCall(MatShellGetContext(J_shell,&user));
440   PetscCall(VecZeroEntries(Y));
441 
442   /* Sdiag = 1./((Av*(1./v)).^2) */
443   PetscCall(VecSet(user->uwork,0));
444   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
445   PetscCall(VecExp(user->uwork));
446   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
447   PetscCall(VecPointwiseMult(user->Sdiag,user->Swork,user->Swork));
448   PetscCall(VecReciprocal(user->Sdiag));
449 
450   for (i=0;i<user->ns;i++) {
451     PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
452     PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
453 
454     /* Swork = (Div' * b(:,i)) */
455     PetscCall(MatMult(user->Grad,user->subq,user->Swork));
456 
457     /* Twork = Grad*y(:,i) */
458     PetscCall(MatMult(user->Grad,user->suby,user->Twork));
459 
460     /* Twork = sdiag(Twork) * Swork */
461     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
462 
463     /* Swork = pointwisemult(Sdiag,Twork) */
464     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Sdiag));
465 
466     /* Ywork = Av' * Swork */
467     PetscCall(MatMultTranspose(user->Av,user->Swork,user->Ywork));
468 
469     /* Ywork = pointwisemult(uwork,Ywork) */
470     PetscCall(VecPointwiseMult(user->Ywork,user->uwork,user->Ywork));
471     PetscCall(VecAXPY(Y,1.0,user->Ywork));
472     PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
478 {
479    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
480    PetscReal      sum;
481    PetscInt       i;
482    AppCtx         *user = (AppCtx*)ptr;
483 
484    PetscFunctionBegin;
485    PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
486    if (user->ns == 1) {
487      PetscCall(MatMult(user->Grad,user->y,user->Swork));
488      PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
489      PetscCall(MatMultTranspose(user->Grad,user->Swork,C));
490      PetscCall(VecSum(user->y,&sum));
491      sum /= user->ndesign;
492      PetscCall(VecShift(C,sum));
493    } else {
494      for (i=0;i<user->ns;i++) {
495       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
496       PetscCall(Scatter(C,user->subq,user->yi_scatter[i],0,0));
497       PetscCall(MatMult(user->Grad,user->suby,user->Swork));
498       PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
499       PetscCall(MatMultTranspose(user->Grad,user->Swork,user->subq));
500 
501       PetscCall(VecSum(user->suby,&sum));
502       sum /= user->ndesign;
503       PetscCall(VecShift(user->subq,sum));
504 
505       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
506       PetscCall(Gather(C,user->subq,user->yi_scatter[i],0,0));
507      }
508    }
509    PetscCall(VecAXPY(C,-1.0,user->q));
510    PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
514 {
515   PetscFunctionBegin;
516   PetscCall(VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
517   PetscCall(VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
518   if (sub2) {
519     PetscCall(VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
520     PetscCall(VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
521   }
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
526 {
527   PetscFunctionBegin;
528   PetscCall(VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
529   PetscCall(VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
530   if (sub2) {
531     PetscCall(VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
532     PetscCall(VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
533   }
534   PetscFunctionReturn(0);
535 }
536 
537 PetscErrorCode EllipticInitialize(AppCtx *user)
538 {
539   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
540   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
541   PetscReal      *x,*y,*z;
542   PetscReal      h,meanut;
543   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
544   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
545   IS             is_alldesign,is_allstate;
546   IS             is_from_d;
547   IS             is_from_y;
548   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
549   const PetscInt *ranges, *subranges;
550   PetscMPIInt    size;
551   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
552   PetscScalar    v,vx,vy,vz;
553   PetscInt       offset,subindex,subvec,nrank,kk;
554 
555   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
556                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
557                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
558                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
559                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
560                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
561                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
562                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
563 
564   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
565                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
566                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
567                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
568                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
569                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
570                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
571                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
572 
573   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
574                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
575                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
576                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
577                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
578                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
579                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
580                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
581 
582   PetscFunctionBegin;
583   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
584   PetscCall(PetscLogStageRegister("Elliptic Setup",&user->stages[0]));
585   PetscCall(PetscLogStagePush(user->stages[0]));
586 
587   /* Create u,y,c,x */
588   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->u));
589   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->y));
590   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->c));
591   PetscCall(VecSetSizes(user->u,PETSC_DECIDE,user->ndesign));
592   PetscCall(VecSetFromOptions(user->u));
593   PetscCall(VecGetLocalSize(user->u,&ysubnlocal));
594   PetscCall(VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate));
595   PetscCall(VecSetSizes(user->c,ysubnlocal*user->ns,user->m));
596   PetscCall(VecSetFromOptions(user->y));
597   PetscCall(VecSetFromOptions(user->c));
598 
599   /*
600      *******************************
601      Create scatters for x <-> y,u
602      *******************************
603 
604      If the state vector y and design vector u are partitioned as
605      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
606      then the solution vector x is organized as
607      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
608      The index sets user->s_is and user->d_is correspond to the indices of the
609      state and design variables owned by the current processor.
610   */
611   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->x));
612 
613   PetscCall(VecGetOwnershipRange(user->y,&lo,&hi));
614   PetscCall(VecGetOwnershipRange(user->u,&lo2,&hi2));
615 
616   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
617   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is));
618   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
619   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is));
620 
621   PetscCall(VecSetSizes(user->x,hi-lo+hi2-lo2,user->n));
622   PetscCall(VecSetFromOptions(user->x));
623 
624   PetscCall(VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter));
625   PetscCall(VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter));
626   PetscCall(ISDestroy(&is_alldesign));
627   PetscCall(ISDestroy(&is_allstate));
628   /*
629      *******************************
630      Create scatter from y to y_1,y_2,...,y_ns
631      *******************************
632   */
633   PetscCall(PetscMalloc1(user->ns,&user->yi_scatter));
634   PetscCall(VecDuplicate(user->u,&user->suby));
635   PetscCall(VecDuplicate(user->u,&user->subq));
636 
637   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
638   istart = 0;
639   for (i=0; i<user->ns; i++) {
640     PetscCall(VecGetOwnershipRange(user->suby,&lo,&hi));
641     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
642     PetscCall(VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]));
643     istart = istart + hi-lo;
644     PetscCall(ISDestroy(&is_from_y));
645   }
646   /*
647      *******************************
648      Create scatter from d to d_1,d_2,...,d_ns
649      *******************************
650   */
651   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->subd));
652   PetscCall(VecSetSizes(user->subd,PETSC_DECIDE,user->ndata));
653   PetscCall(VecSetFromOptions(user->subd));
654   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
655   PetscCall(VecGetLocalSize(user->subd,&dsubnlocal));
656   PetscCall(VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns));
657   PetscCall(VecSetFromOptions(user->d));
658   PetscCall(PetscMalloc1(user->ns,&user->di_scatter));
659 
660   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
661   istart = 0;
662   for (i=0; i<user->ns; i++) {
663     PetscCall(VecGetOwnershipRange(user->subd,&lo,&hi));
664     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
665     PetscCall(VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]));
666     istart = istart + hi-lo;
667     PetscCall(ISDestroy(&is_from_d));
668   }
669 
670   PetscCall(PetscMalloc1(user->mx,&x));
671   PetscCall(PetscMalloc1(user->mx,&y));
672   PetscCall(PetscMalloc1(user->mx,&z));
673 
674   user->ksp_its = 0;
675   user->ksp_its_initial = 0;
676 
677   n = user->mx * user->mx * user->mx;
678   m = 3 * user->mx * user->mx * (user->mx-1);
679   sqrt_beta = PetscSqrtScalar(user->beta);
680 
681   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
682   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
683   PetscCall(VecSetSizes(XX,ysubnlocal,n));
684   PetscCall(VecSetSizes(user->q,ysubnlocal*user->ns,user->m));
685   PetscCall(VecSetFromOptions(XX));
686   PetscCall(VecSetFromOptions(user->q));
687 
688   PetscCall(VecDuplicate(XX,&YY));
689   PetscCall(VecDuplicate(XX,&ZZ));
690   PetscCall(VecDuplicate(XX,&XXwork));
691   PetscCall(VecDuplicate(XX,&YYwork));
692   PetscCall(VecDuplicate(XX,&ZZwork));
693   PetscCall(VecDuplicate(XX,&UTwork));
694   PetscCall(VecDuplicate(XX,&user->utrue));
695 
696   /* map for striding q */
697   PetscCall(VecGetOwnershipRanges(user->q,&ranges));
698   PetscCall(VecGetOwnershipRanges(user->u,&subranges));
699 
700   PetscCall(VecGetOwnershipRange(user->q,&lo2,&hi2));
701   PetscCall(VecGetOwnershipRange(user->u,&lo,&hi));
702   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
703   h = 1.0/user->mx;
704   hinv = user->mx;
705   neg_hinv = -hinv;
706 
707   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
708   for (linear_index=istart; linear_index<iend; linear_index++) {
709     i = linear_index % user->mx;
710     j = ((linear_index-i)/user->mx) % user->mx;
711     k = ((linear_index-i)/user->mx-j) / user->mx;
712     vx = h*(i+0.5);
713     vy = h*(j+0.5);
714     vz = h*(k+0.5);
715     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
716     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
717     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
718     for (is=0; is<2; is++) {
719       for (js=0; js<2; js++) {
720         for (ks=0; ks<2; ks++) {
721           ls = is*4 + js*2 + ks;
722           if (ls<user->ns) {
723             l =ls*n + linear_index;
724             /* remap */
725             subindex = l%n;
726             subvec = l/n;
727             nrank=0;
728             while (subindex >= subranges[nrank+1]) nrank++;
729             offset = subindex - subranges[nrank];
730             istart=0;
731             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
732             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
733             l = istart+offset;
734             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
735             PetscCall(VecSetValues(user->q,1,&l,&v,INSERT_VALUES));
736           }
737         }
738       }
739     }
740   }
741 
742   PetscCall(VecAssemblyBegin(XX));
743   PetscCall(VecAssemblyEnd(XX));
744   PetscCall(VecAssemblyBegin(YY));
745   PetscCall(VecAssemblyEnd(YY));
746   PetscCall(VecAssemblyBegin(ZZ));
747   PetscCall(VecAssemblyEnd(ZZ));
748   PetscCall(VecAssemblyBegin(user->q));
749   PetscCall(VecAssemblyEnd(user->q));
750 
751   /* Compute true parameter function
752      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
753   PetscCall(VecCopy(XX,XXwork));
754   PetscCall(VecCopy(YY,YYwork));
755   PetscCall(VecCopy(ZZ,ZZwork));
756 
757   PetscCall(VecShift(XXwork,-0.25));
758   PetscCall(VecShift(YYwork,-0.25));
759   PetscCall(VecShift(ZZwork,-0.25));
760 
761   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
762   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
763   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
764 
765   PetscCall(VecCopy(XXwork,UTwork));
766   PetscCall(VecAXPY(UTwork,1.0,YYwork));
767   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
768   PetscCall(VecScale(UTwork,-20.0));
769   PetscCall(VecExp(UTwork));
770   PetscCall(VecCopy(UTwork,user->utrue));
771 
772   PetscCall(VecCopy(XX,XXwork));
773   PetscCall(VecCopy(YY,YYwork));
774   PetscCall(VecCopy(ZZ,ZZwork));
775 
776   PetscCall(VecShift(XXwork,-0.75));
777   PetscCall(VecShift(YYwork,-0.75));
778   PetscCall(VecShift(ZZwork,-0.75));
779 
780   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
781   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
782   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
783 
784   PetscCall(VecCopy(XXwork,UTwork));
785   PetscCall(VecAXPY(UTwork,1.0,YYwork));
786   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
787   PetscCall(VecScale(UTwork,-20.0));
788   PetscCall(VecExp(UTwork));
789 
790   PetscCall(VecAXPY(user->utrue,-1.0,UTwork));
791 
792   PetscCall(VecDestroy(&XX));
793   PetscCall(VecDestroy(&YY));
794   PetscCall(VecDestroy(&ZZ));
795   PetscCall(VecDestroy(&XXwork));
796   PetscCall(VecDestroy(&YYwork));
797   PetscCall(VecDestroy(&ZZwork));
798   PetscCall(VecDestroy(&UTwork));
799 
800   /* Initial guess and reference model */
801   PetscCall(VecDuplicate(user->utrue,&user->ur));
802   PetscCall(VecSum(user->utrue,&meanut));
803   meanut = meanut / n;
804   PetscCall(VecSet(user->ur,meanut));
805   PetscCall(VecCopy(user->ur,user->u));
806 
807   /* Generate Grad matrix */
808   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
809   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n));
810   PetscCall(MatSetFromOptions(user->Grad));
811   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
812   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
813   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
814 
815   for (i=istart; i<iend; i++) {
816     if (i<m/3) {
817       iblock = i / (user->mx-1);
818       j = iblock*user->mx + (i % (user->mx-1));
819       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
820       j = j+1;
821       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
822     }
823     if (i>=m/3 && i<2*m/3) {
824       iblock = (i-m/3) / (user->mx*(user->mx-1));
825       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
826       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
827       j = j + user->mx;
828       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
829     }
830     if (i>=2*m/3) {
831       j = i-2*m/3;
832       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
833       j = j + user->mx*user->mx;
834       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
835     }
836   }
837 
838   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
839   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
840 
841   /* Generate arithmetic averaging matrix Av */
842   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
843   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n));
844   PetscCall(MatSetFromOptions(user->Av));
845   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
846   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
847   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
848 
849   for (i=istart; i<iend; i++) {
850     if (i<m/3) {
851       iblock = i / (user->mx-1);
852       j = iblock*user->mx + (i % (user->mx-1));
853       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
854       j = j+1;
855       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
856     }
857     if (i>=m/3 && i<2*m/3) {
858       iblock = (i-m/3) / (user->mx*(user->mx-1));
859       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
860       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
861       j = j + user->mx;
862       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
863     }
864     if (i>=2*m/3) {
865       j = i-2*m/3;
866       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
867       j = j + user->mx*user->mx;
868       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
869     }
870   }
871 
872   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
873   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
874 
875   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
876   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n));
877   PetscCall(MatSetFromOptions(user->L));
878   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
879   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
880   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
881 
882   for (i=istart; i<iend; i++) {
883     if (i<m/3) {
884       iblock = i / (user->mx-1);
885       j = iblock*user->mx + (i % (user->mx-1));
886       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
887       j = j+1;
888       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
889     }
890     if (i>=m/3 && i<2*m/3) {
891       iblock = (i-m/3) / (user->mx*(user->mx-1));
892       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
893       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
894       j = j + user->mx;
895       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
896     }
897     if (i>=2*m/3 && i<m) {
898       j = i-2*m/3;
899       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
900       j = j + user->mx*user->mx;
901       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
902     }
903     if (i>=m) {
904       j = i - m;
905       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
906     }
907   }
908   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
909   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
910   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
911 
912   /* Generate Div matrix */
913   if (!user->use_ptap) {
914     /* Generate Div matrix */
915     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Div));
916     PetscCall(MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m));
917     PetscCall(MatSetFromOptions(user->Div));
918     PetscCall(MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL));
919     PetscCall(MatSeqAIJSetPreallocation(user->Div,6,NULL));
920     PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
921 
922     for (i=istart; i<iend; i++) {
923       if (i<m/3) {
924         iblock = i / (user->mx-1);
925         j = iblock*user->mx + (i % (user->mx-1));
926         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
927         j = j+1;
928         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
929       }
930       if (i>=m/3 && i<2*m/3) {
931         iblock = (i-m/3) / (user->mx*(user->mx-1));
932         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
933         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
934         j = j + user->mx;
935         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
936       }
937       if (i>=2*m/3) {
938         j = i-2*m/3;
939         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
940         j = j + user->mx*user->mx;
941         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
942       }
943     }
944 
945     PetscCall(MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY));
946     PetscCall(MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY));
947     PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
948   } else {
949     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Diag));
950     PetscCall(MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m));
951     PetscCall(MatSetFromOptions(user->Diag));
952     PetscCall(MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL));
953     PetscCall(MatSeqAIJSetPreallocation(user->Diag,1,NULL));
954   }
955 
956   /* Build work vectors and matrices */
957   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
958   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m));
959   PetscCall(VecSetFromOptions(user->S));
960 
961   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
962   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
963   PetscCall(VecSetFromOptions(user->lwork));
964 
965   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
966 
967   PetscCall(VecDuplicate(user->S,&user->Swork));
968   PetscCall(VecDuplicate(user->S,&user->Sdiag));
969   PetscCall(VecDuplicate(user->S,&user->Av_u));
970   PetscCall(VecDuplicate(user->S,&user->Twork));
971   PetscCall(VecDuplicate(user->y,&user->ywork));
972   PetscCall(VecDuplicate(user->u,&user->Ywork));
973   PetscCall(VecDuplicate(user->u,&user->uwork));
974   PetscCall(VecDuplicate(user->u,&user->js_diag));
975   PetscCall(VecDuplicate(user->c,&user->cwork));
976   PetscCall(VecDuplicate(user->d,&user->dwork));
977 
978   /* Create a matrix-free shell user->Jd for computing B*x */
979   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd));
980   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
981   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
982 
983   /* Compute true state function ytrue given utrue */
984   PetscCall(VecDuplicate(user->y,&user->ytrue));
985 
986   /* First compute Av_u = Av*exp(-u) */
987   PetscCall(VecSet(user->uwork, 0));
988   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /* Note: user->utrue */
989   PetscCall(VecExp(user->uwork));
990   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
991 
992   /* Next form DSG = Div*S*Grad */
993   PetscCall(VecCopy(user->Av_u,user->Swork));
994   PetscCall(VecReciprocal(user->Swork));
995   if (user->use_ptap) {
996     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
997     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
998   } else {
999     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1000     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
1001 
1002     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
1003   }
1004 
1005   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1006   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1007 
1008   if (user->use_lrc == PETSC_TRUE) {
1009     v=PetscSqrtReal(1.0 /user->ndesign);
1010     PetscCall(PetscMalloc1(user->ndesign,&user->ones));
1011 
1012     for (i=0;i<user->ndesign;i++) {
1013       user->ones[i]=v;
1014     }
1015     PetscCall(MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones));
1016     PetscCall(MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock));
1017     PetscCall(MatSetUp(user->JsBlock));
1018   } else {
1019     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1020     PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock));
1021     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult));
1022     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult));
1023   }
1024   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE));
1025   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1026   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js));
1027   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
1028   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult));
1029   PetscCall(MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE));
1030   PetscCall(MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1031 
1032   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv));
1033   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult));
1034   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult));
1035   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE));
1036   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1037 
1038   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1039   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1040   /* Now solve for ytrue */
1041   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
1042   PetscCall(KSPSetFromOptions(user->solver));
1043 
1044   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
1045 
1046   PetscCall(MatMult(user->JsInv,user->q,user->ytrue));
1047   /* First compute Av_u = Av*exp(-u) */
1048   PetscCall(VecSet(user->uwork,0));
1049   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /* Note: user->u */
1050   PetscCall(VecExp(user->uwork));
1051   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1052 
1053   /* Next update DSG = Div*S*Grad  with user->u */
1054   PetscCall(VecCopy(user->Av_u,user->Swork));
1055   PetscCall(VecReciprocal(user->Swork));
1056   if (user->use_ptap) {
1057     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
1058     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
1059   } else {
1060     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1061     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1062     PetscCall(MatProductNumeric(user->DSG));
1063   }
1064 
1065   /* Now solve for y */
1066 
1067   PetscCall(MatMult(user->JsInv,user->q,user->y));
1068 
1069   user->ksp_its_initial = user->ksp_its;
1070   user->ksp_its = 0;
1071   /* Construct projection matrix Q (blocks) */
1072   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Q));
1073   PetscCall(MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign));
1074   PetscCall(MatSetFromOptions(user->Q));
1075   PetscCall(MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL));
1076   PetscCall(MatSeqAIJSetPreallocation(user->Q,8,NULL));
1077 
1078   for (i=0; i<user->mx; i++) {
1079     x[i] = h*(i+0.5);
1080     y[i] = h*(i+0.5);
1081     z[i] = h*(i+0.5);
1082   }
1083   PetscCall(MatGetOwnershipRange(user->Q,&istart,&iend));
1084 
1085   nx = user->mx; ny = user->mx; nz = user->mx;
1086   for (i=istart; i<iend; i++) {
1087 
1088     xri = xr[i];
1089     im = 0;
1090     xim = x[im];
1091     while (xri>xim && im<nx) {
1092       im = im+1;
1093       xim = x[im];
1094     }
1095     indx1 = im-1;
1096     indx2 = im;
1097     dx1 = xri - x[indx1];
1098     dx2 = x[indx2] - xri;
1099 
1100     yri = yr[i];
1101     im = 0;
1102     yim = y[im];
1103     while (yri>yim && im<ny) {
1104       im = im+1;
1105       yim = y[im];
1106     }
1107     indy1 = im-1;
1108     indy2 = im;
1109     dy1 = yri - y[indy1];
1110     dy2 = y[indy2] - yri;
1111 
1112     zri = zr[i];
1113     im = 0;
1114     zim = z[im];
1115     while (zri>zim && im<nz) {
1116       im = im+1;
1117       zim = z[im];
1118     }
1119     indz1 = im-1;
1120     indz2 = im;
1121     dz1 = zri - z[indz1];
1122     dz2 = z[indz2] - zri;
1123 
1124     Dx = x[indx2] - x[indx1];
1125     Dy = y[indy2] - y[indy1];
1126     Dz = z[indz2] - z[indz1];
1127 
1128     j = indx1 + indy1*nx + indz1*nx*ny;
1129     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1130     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1131 
1132     j = indx1 + indy1*nx + indz2*nx*ny;
1133     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1134     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1135 
1136     j = indx1 + indy2*nx + indz1*nx*ny;
1137     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1138     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1139 
1140     j = indx1 + indy2*nx + indz2*nx*ny;
1141     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1142     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1143 
1144     j = indx2 + indy1*nx + indz1*nx*ny;
1145     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1146     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1147 
1148     j = indx2 + indy1*nx + indz2*nx*ny;
1149     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1150     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1151 
1152     j = indx2 + indy2*nx + indz1*nx*ny;
1153     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1154     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1155 
1156     j = indx2 + indy2*nx + indz2*nx*ny;
1157     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1158     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1159   }
1160 
1161   PetscCall(MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY));
1162   PetscCall(MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY));
1163   /* Create MQ (composed of blocks of Q */
1164   PetscCall(MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ));
1165   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult));
1166   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose));
1167 
1168   /* Add noise to the measurement data */
1169   PetscCall(VecSet(user->ywork,1.0));
1170   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1171   PetscCall(MatMult(user->MQ,user->ywork,user->d));
1172 
1173   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1174   PetscCall(PetscFree(x));
1175   PetscCall(PetscFree(y));
1176   PetscCall(PetscFree(z));
1177   PetscCall(PetscLogStagePop());
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode EllipticDestroy(AppCtx *user)
1182 {
1183   PetscInt       i;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(MatDestroy(&user->DSG));
1187   PetscCall(KSPDestroy(&user->solver));
1188   PetscCall(MatDestroy(&user->Q));
1189   PetscCall(MatDestroy(&user->MQ));
1190   if (!user->use_ptap) {
1191     PetscCall(MatDestroy(&user->Div));
1192     PetscCall(MatDestroy(&user->Divwork));
1193   } else {
1194     PetscCall(MatDestroy(&user->Diag));
1195   }
1196   if (user->use_lrc) {
1197     PetscCall(MatDestroy(&user->Ones));
1198   }
1199 
1200   PetscCall(MatDestroy(&user->Grad));
1201   PetscCall(MatDestroy(&user->Av));
1202   PetscCall(MatDestroy(&user->Avwork));
1203   PetscCall(MatDestroy(&user->L));
1204   PetscCall(MatDestroy(&user->Js));
1205   PetscCall(MatDestroy(&user->Jd));
1206   PetscCall(MatDestroy(&user->JsBlock));
1207   PetscCall(MatDestroy(&user->JsInv));
1208 
1209   PetscCall(VecDestroy(&user->x));
1210   PetscCall(VecDestroy(&user->u));
1211   PetscCall(VecDestroy(&user->uwork));
1212   PetscCall(VecDestroy(&user->utrue));
1213   PetscCall(VecDestroy(&user->y));
1214   PetscCall(VecDestroy(&user->ywork));
1215   PetscCall(VecDestroy(&user->ytrue));
1216   PetscCall(VecDestroy(&user->c));
1217   PetscCall(VecDestroy(&user->cwork));
1218   PetscCall(VecDestroy(&user->ur));
1219   PetscCall(VecDestroy(&user->q));
1220   PetscCall(VecDestroy(&user->d));
1221   PetscCall(VecDestroy(&user->dwork));
1222   PetscCall(VecDestroy(&user->lwork));
1223   PetscCall(VecDestroy(&user->S));
1224   PetscCall(VecDestroy(&user->Swork));
1225   PetscCall(VecDestroy(&user->Sdiag));
1226   PetscCall(VecDestroy(&user->Ywork));
1227   PetscCall(VecDestroy(&user->Twork));
1228   PetscCall(VecDestroy(&user->Av_u));
1229   PetscCall(VecDestroy(&user->js_diag));
1230   PetscCall(ISDestroy(&user->s_is));
1231   PetscCall(ISDestroy(&user->d_is));
1232   PetscCall(VecDestroy(&user->suby));
1233   PetscCall(VecDestroy(&user->subd));
1234   PetscCall(VecDestroy(&user->subq));
1235   PetscCall(VecScatterDestroy(&user->state_scatter));
1236   PetscCall(VecScatterDestroy(&user->design_scatter));
1237   for (i=0;i<user->ns;i++) {
1238     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1239     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1240   }
1241   PetscCall(PetscFree(user->yi_scatter));
1242   PetscCall(PetscFree(user->di_scatter));
1243   if (user->use_lrc) {
1244     PetscCall(PetscFree(user->ones));
1245     PetscCall(MatDestroy(&user->Ones));
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1251 {
1252   Vec            X;
1253   PetscReal      unorm,ynorm;
1254   AppCtx         *user = (AppCtx*)ptr;
1255 
1256   PetscFunctionBegin;
1257   PetscCall(TaoGetSolution(tao,&X));
1258   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1259   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1260   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1261   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1262   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1263   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /*TEST
1268 
1269    build:
1270       requires: !complex
1271 
1272    test:
1273       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1274       requires: !single
1275 
1276    test:
1277       suffix: 2
1278       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1279       requires: !single
1280 
1281 TEST*/
1282