xref: /libCEED/examples/petsc/bps.c (revision 194c25f79561da6e0105f121e91f96f896b24354)
1 //                        libCEED + PETSc Example: CEED BPs
2 //
3 // This example demonstrates a simple usage of libCEED with PETSc to solve the
4 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps.
5 //
6 // The code is intentionally "raw", using only low-level communication
7 // primitives.
8 //
9 // Build with:
10 //
11 //     make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>]
12 //
13 // Sample runs:
14 //
15 //     bps -problem bp1
16 //     bps -problem bp2 -ceed /cpu/self
17 //     bps -problem bp3 -ceed /gpu/occa
18 //     bps -problem bp4 -ceed /cpu/occa
19 //     bps -problem bp5 -ceed /omp/occa
20 //     bps -problem bp6 -ceed /ocl/occa
21 //
22 //TESTARGS -ceed {ceed_resource} -test -problem bp3
23 
24 /// @file
25 /// CEED BPs example using PETSc
26 const char help[] = "Solve CEED BPs using PETSc\n";
27 
28 #include <stdbool.h>
29 #include <string.h>
30 #include "common.h"
31 #include "bp1.h"
32 #include "bp2.h"
33 #include "bp3.h"
34 #include "bp4.h"
35 
36 #define PATH(BASE) __DIR__ #BASE
37 
38 static void Split3(PetscInt size, PetscInt m[3], bool reverse) {
39   for (PetscInt d=0,sizeleft=size; d<3; d++) {
40     PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d)));
41     while (try * (sizeleft / try) != sizeleft) try++;
42     m[reverse ? 2-d : d] = try;
43     sizeleft /= try;
44   }
45 }
46 
47 static PetscInt Max3(const PetscInt a[3]) {
48   return PetscMax(a[0], PetscMax(a[1], a[2]));
49 }
50 static PetscInt Min3(const PetscInt a[3]) {
51   return PetscMin(a[0], PetscMin(a[1], a[2]));
52 }
53 static void GlobalDof(const PetscInt p[3], const PetscInt irank[3],
54                       PetscInt degree, const PetscInt melem[3],
55                       PetscInt mdof[3]) {
56   for (int d=0; d<3; d++)
57     mdof[d] = degree*melem[d] + (irank[d] == p[d]-1);
58 }
59 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3],
60                             PetscInt degree, const PetscInt melem[3]) {
61   PetscInt start = 0;
62   // Dumb brute-force is easier to read
63   for (PetscInt i=0; i<p[0]; i++) {
64     for (PetscInt j=0; j<p[1]; j++) {
65       for (PetscInt k=0; k<p[2]; k++) {
66         PetscInt mdof[3], ijkrank[] = {i,j,k};
67         if (i == irank[0] && j == irank[1] && k == irank[2]) return start;
68         GlobalDof(p, ijkrank, degree, melem, mdof);
69         start += mdof[0] * mdof[1] * mdof[2];
70       }
71     }
72   }
73   return -1;
74 }
75 static int CreateRestriction(Ceed ceed, const CeedInt melem[3],
76                              CeedInt P, CeedInt ncomp,
77                              CeedElemRestriction *Erestrict) {
78   const PetscInt Nelem = melem[0]*melem[1]*melem[2];
79   PetscInt mdof[3], *idx, *idxp;
80 
81   for (int d=0; d<3; d++) mdof[d] = melem[d]*(P-1) + 1;
82   idxp = idx = malloc(Nelem*P*P*P*sizeof idx[0]);
83   for (CeedInt i=0; i<melem[0]; i++) {
84     for (CeedInt j=0; j<melem[1]; j++) {
85       for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) {
86         for (CeedInt ii=0; ii<P; ii++) {
87           for (CeedInt jj=0; jj<P; jj++) {
88             for (CeedInt kk=0; kk<P; kk++) {
89               if (0) { // This is the C-style (i,j,k) ordering that I prefer
90                 idxp[(ii*P+jj)*P+kk] = (((i*(P-1)+ii)*mdof[1]
91                                          + (j*(P-1)+jj))*mdof[2]
92                                         + (k*(P-1)+kk));
93               } else { // (k,j,i) ordering for consistency with MFEM example
94                 idxp[ii+P*(jj+P*kk)] = (((i*(P-1)+ii)*mdof[1]
95                                          + (j*(P-1)+jj))*mdof[2]
96                                         + (k*(P-1)+kk));
97               }
98             }
99           }
100         }
101       }
102     }
103   }
104   CeedElemRestrictionCreate(ceed, Nelem, P*P*P, mdof[0]*mdof[1]*mdof[2], ncomp,
105                             CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict);
106   PetscFunctionReturn(0);
107 }
108 
109 // Data for PETSc
110 typedef struct User_ *User;
111 struct User_ {
112   MPI_Comm comm;
113   VecScatter ltog;              // Scatter for all entries
114   VecScatter ltog0;             // Skip Dirichlet values
115   VecScatter gtogD;             // global-to-global; only Dirichlet values
116   Vec Xloc, Yloc;
117   CeedVector xceed, yceed;
118   CeedOperator op;
119   CeedVector rho;
120   Ceed ceed;
121 };
122 
123 // BP Options
124 typedef enum {
125   CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2,
126   CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5
127 } bpType;
128 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6",
129                                       "bpType","CEED_BP",0};
130 
131 // BP specific data
132 typedef struct {
133   CeedInt vscale, qdatasize, qextra;
134   CeedQFunctionUser setup, apply, error;
135   const char setupfname[PETSC_MAX_PATH_LEN], applyfname[PETSC_MAX_PATH_LEN],
136              errorfname[PETSC_MAX_PATH_LEN];
137   CeedEvalMode inmode, outmode;
138 } bpData;
139 
140 bpData bpOptions[6] = {
141   [CEED_BP1] = {
142     .vscale = 1,
143     .qdatasize = 1,
144     .qextra = 1,
145     .setup = SetupMass,
146     .apply = Mass,
147     .error = Error,
148     .setupfname = PATH(bp1.h:SetupMass),
149     .applyfname = PATH(bp1.h:Mass),
150     .errorfname = PATH(common.h:Error),
151     .inmode = CEED_EVAL_INTERP,
152     .outmode = CEED_EVAL_INTERP},
153   [CEED_BP2] = {
154     .vscale = 3,
155     .qdatasize = 1,
156     .qextra = 1,
157     .setup = SetupMass3,
158     .apply = Mass3,
159     .error = Error3,
160     .setupfname = PATH(bp2.h:SetupMass3),
161     .applyfname = PATH(bp2.h:Mass3),
162     .errorfname = PATH(common.h:Error3),
163     .inmode = CEED_EVAL_INTERP,
164     .outmode = CEED_EVAL_INTERP},
165   [CEED_BP3] = {
166     .vscale = 1,
167     .qdatasize = 6,
168     .qextra = 1,
169     .setup = SetupDiff,
170     .apply = Diff,
171     .error = Error,
172     .setupfname = PATH(bp3.h:SetupDiff),
173     .applyfname = PATH(bp3.h:Diff),
174     .errorfname = PATH(common.h:Error),
175     .inmode = CEED_EVAL_GRAD,
176     .outmode = CEED_EVAL_GRAD},
177   [CEED_BP4] = {
178     .vscale = 3,
179     .qdatasize = 6,
180     .qextra = 1,
181     .setup = SetupDiff3,
182     .apply = Diff3,
183     .error = Error3,
184     .setupfname = PATH(bp4.h:SetupDiff3),
185     .applyfname = PATH(bp4.h:Diff),
186     .errorfname = PATH(common.h:Error3),
187     .inmode = CEED_EVAL_GRAD,
188     .outmode = CEED_EVAL_GRAD},
189   [CEED_BP5] = {
190     .vscale = 1,
191     .qdatasize = 6,
192     .qextra = 0,
193     .setup = SetupDiff,
194     .apply = Diff,
195     .error = Error,
196     .setupfname = PATH(bp3.h:SetupDiff),
197     .applyfname = PATH(bp3.h:Diff),
198     .errorfname = PATH(common.h:Error),
199     .inmode = CEED_EVAL_GRAD,
200     .outmode = CEED_EVAL_GRAD},
201   [CEED_BP6] = {
202     .vscale = 3,
203     .qdatasize = 6,
204     .qextra = 0,
205     .setup = SetupDiff3,
206     .apply = Diff3,
207     .error = Error3,
208     .setupfname = PATH(bp4.h:SetupDiff3),
209     .applyfname = PATH(bp4.h:Diff),
210     .errorfname = PATH(common.h:Error3),
211     .inmode = CEED_EVAL_GRAD,
212     .outmode = CEED_EVAL_GRAD}
213 };
214 
215 // This function uses libCEED to compute the action of the mass matrix
216 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) {
217   PetscErrorCode ierr;
218   User user;
219   PetscScalar *x, *y;
220 
221   PetscFunctionBeginUser;
222   ierr = MatShellGetContext(A, &user); CHKERRQ(ierr);
223   ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES,
224                          SCATTER_REVERSE); CHKERRQ(ierr);
225   ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE);
226   CHKERRQ(ierr);
227   ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr);
228 
229   ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr);
230   ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr);
231   CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x);
232   CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y);
233 
234   CeedOperatorApply(user->op, user->xceed, user->yceed,
235                     CEED_REQUEST_IMMEDIATE);
236   ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr);
237 
238   ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr);
239   ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr);
240 
241   if (Y) {
242     ierr = VecZeroEntries(Y); CHKERRQ(ierr);
243     ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD);
244     CHKERRQ(ierr);
245     ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD);
246     CHKERRQ(ierr);
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 // This function uses libCEED to compute the action of the Laplacian with
252 // Dirichlet boundary conditions
253 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) {
254   PetscErrorCode ierr;
255   User user;
256   PetscScalar *x, *y;
257 
258   PetscFunctionBeginUser;
259   ierr = MatShellGetContext(A, &user); CHKERRQ(ierr);
260   ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES,
261                          SCATTER_REVERSE); CHKERRQ(ierr);
262   ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES,
263                        SCATTER_REVERSE);
264   CHKERRQ(ierr);
265   ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr);
266 
267   ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr);
268   ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr);
269   CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x);
270   CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y);
271 
272   CeedOperatorApply(user->op, user->xceed, user->yceed,
273                     CEED_REQUEST_IMMEDIATE);
274   ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr);
275 
276   ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr);
277   ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr);
278 
279   ierr = VecZeroEntries(Y); CHKERRQ(ierr);
280   ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD);
281   CHKERRQ(ierr);
282   ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD);
283   CHKERRQ(ierr);
284   ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD);
285   CHKERRQ(ierr);
286   ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD);
287   CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 // This function calculates the error in the final solution
292 static PetscErrorCode ComputeErrorMax(User user, CeedOperator op_error, Vec X,
293                                       CeedVector target, PetscReal *maxerror) {
294   PetscErrorCode ierr;
295   PetscScalar *x;
296   CeedVector collocated_error;
297   CeedInt length;
298 
299   PetscFunctionBeginUser;
300   CeedVectorGetLength(target, &length);
301   CeedVectorCreate(user->ceed, length, &collocated_error);
302   ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES,
303                          SCATTER_REVERSE); CHKERRQ(ierr);
304   ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE);
305   CHKERRQ(ierr);
306   ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr);
307   CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x);
308   CeedOperatorApply(op_error, user->xceed, collocated_error,
309                     CEED_REQUEST_IMMEDIATE);
310   VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr);
311 
312   *maxerror = 0;
313   const CeedScalar *e;
314   CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e);
315   for (CeedInt i=0; i<length; i++) {
316     *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i]));
317   }
318   CeedVectorRestoreArrayRead(collocated_error, &e);
319   ierr = MPI_Allreduce(MPI_IN_PLACE, &maxerror,
320                        1, MPIU_SCALAR, MPIU_MAX, user->comm); CHKERRQ(ierr);
321   CeedVectorDestroy(&collocated_error);
322   PetscFunctionReturn(0);
323 }
324 
325 int main(int argc, char **argv) {
326   PetscInt ierr;
327   MPI_Comm comm;
328   char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self";
329   PetscInt degree, qextra, localdof, localelem, melem[3], mdof[3], p[3],
330            irank[3], ldof[3], lsize, vscale = 1;
331   PetscScalar *r;
332   PetscBool test_mode, benchmark_mode;
333   PetscMPIInt size, rank;
334   VecScatter ltog, ltog0, gtogD;
335   Ceed ceed;
336   CeedBasis basisx, basisu;
337   CeedElemRestriction Erestrictx, Erestrictu, Erestrictxi, Erestrictui,
338                       Erestrictqdi;
339   CeedQFunction qf_setup, qf_apply, qf_error;
340   CeedOperator op_setup, op_apply, op_error;
341   CeedVector xcoord, rho, rhsceed, target;
342   CeedInt P, Q;
343   Vec X, Xloc, rhs, rhsloc;
344   Mat mat;
345   KSP ksp;
346   User user;
347   double my_rt_start, my_rt, rt_min, rt_max;
348   bpType bpChoice;
349 
350   ierr = PetscInitialize(&argc, &argv, NULL, help);
351   if (ierr) return ierr;
352   comm = PETSC_COMM_WORLD;
353   ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
354   bpChoice = CEED_BP1;
355   ierr = PetscOptionsEnum("-problem",
356                           "CEED benchmark problem to solve", NULL,
357                           bpTypes, (PetscEnum)bpChoice, (PetscEnum*)&bpChoice,
358                           NULL); CHKERRQ(ierr);
359   vscale = bpOptions[bpChoice].vscale;
360   test_mode = PETSC_FALSE;
361   ierr = PetscOptionsBool("-test",
362                           "Testing mode (do not print unless error is large)",
363                           NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr);
364   benchmark_mode = PETSC_FALSE;
365   ierr = PetscOptionsBool("-benchmark",
366                           "Benchmarking mode (prints benchmark statistics)",
367                           NULL, benchmark_mode, &benchmark_mode, NULL);
368   CHKERRQ(ierr);
369   degree = test_mode ? 3 : 1;
370   ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis",
371                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
372   qextra = bpOptions[bpChoice].qextra;
373   ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points",
374                          NULL, qextra, &qextra, NULL); CHKERRQ(ierr);
375   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
376                             NULL, ceedresource, ceedresource,
377                             sizeof(ceedresource), NULL); CHKERRQ(ierr);
378   localdof = 1000;
379   ierr = PetscOptionsInt("-local",
380                          "Target number of locally owned degrees of freedom per process",
381                          NULL, localdof, &localdof, NULL); CHKERRQ(ierr);
382   ierr = PetscOptionsEnd(); CHKERRQ(ierr);
383   P = degree + 1;
384   Q = P + qextra;
385 
386   // Determine size of process grid
387   ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr);
388   Split3(size, p, false);
389 
390   // Find a nicely composite number of elements no less than localdof
391   for (localelem = PetscMax(1, localdof / (degree*degree*degree)); ;
392        localelem++) {
393     Split3(localelem, melem, true);
394     if (Max3(melem) / Min3(melem) <= 2) break;
395   }
396 
397   // Find my location in the process grid
398   ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr);
399   for (int d=0,rankleft=rank; d<3; d++) {
400     const int pstride[3] = {p[1] *p[2], p[2], 1};
401     irank[d] = rankleft / pstride[d];
402     rankleft -= irank[d] * pstride[d];
403   }
404 
405   GlobalDof(p, irank, degree, melem, mdof);
406 
407   ierr = VecCreate(comm, &X); CHKERRQ(ierr);
408   ierr = VecSetSizes(X, mdof[0]*mdof[1]*mdof[2]*vscale, PETSC_DECIDE);
409   CHKERRQ(ierr);
410   ierr = VecSetUp(X); CHKERRQ(ierr);
411 
412   if (!test_mode) {
413     CeedInt gsize;
414     ierr = VecGetSize(X, &gsize); CHKERRQ(ierr);
415     ierr = PetscPrintf(comm,
416                        "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n"
417                        "  libCEED:\n"
418                        "    libCEED Backend                    : %s\n"
419                        "  Mesh:\n"
420                        "    Number of 1D Basis Nodes (p)       : %d\n"
421                        "    Number of 1D Quadrature Points (q) : %d\n"
422                        "    Global DOFs                        : %D\n"
423                        "    Process Decomposition              : %D %D %D\n"
424                        "    Local Elements                     : %D = %D %D %D\n"
425                        "    Owned DOFs                         : %D = %D %D %D\n",
426                        bpChoice+1, ceedresource, P, Q,  gsize/vscale, p[0],
427                        p[1], p[2], localelem, melem[0], melem[1], melem[2],
428                        mdof[0]*mdof[1]*mdof[2], mdof[0], mdof[1], mdof[2]);
429     CHKERRQ(ierr);
430   }
431 
432   {
433     lsize = 1;
434     for (int d=0; d<3; d++) {
435       ldof[d] = melem[d]*degree + 1;
436       lsize *= ldof[d];
437     }
438     ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr);
439     ierr = VecSetSizes(Xloc, lsize*vscale, PETSC_DECIDE); CHKERRQ(ierr);
440     ierr = VecSetUp(Xloc); CHKERRQ(ierr);
441 
442     // Create local-to-global scatter
443     PetscInt *ltogind, *ltogind0, *locind, l0count;
444     IS ltogis, ltogis0, locis;
445     PetscInt gstart[2][2][2], gmdof[2][2][2][3];
446 
447     for (int i=0; i<2; i++) {
448       for (int j=0; j<2; j++) {
449         for (int k=0; k<2; k++) {
450           PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k};
451           gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem);
452           GlobalDof(p, ijkrank, degree, melem, gmdof[i][j][k]);
453         }
454       }
455     }
456 
457     ierr = PetscMalloc1(lsize, &ltogind); CHKERRQ(ierr);
458     ierr = PetscMalloc1(lsize, &ltogind0); CHKERRQ(ierr);
459     ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr);
460     l0count = 0;
461     for (PetscInt i=0,ir,ii; ir=i>=mdof[0], ii=i-ir*mdof[0], i<ldof[0]; i++) {
462       for (PetscInt j=0,jr,jj; jr=j>=mdof[1], jj=j-jr*mdof[1], j<ldof[1]; j++) {
463         for (PetscInt k=0,kr,kk; kr=k>=mdof[2], kk=k-kr*mdof[2], k<ldof[2]; k++) {
464           PetscInt here = (i*ldof[1]+j)*ldof[2]+k;
465           ltogind[here] =
466             gstart[ir][jr][kr] + (ii*gmdof[ir][jr][kr][1]+jj)*gmdof[ir][jr][kr][2]+kk;
467           if ((irank[0] == 0 && i == 0)
468               || (irank[1] == 0 && j == 0)
469               || (irank[2] == 0 && k == 0)
470               || (irank[0]+1 == p[0] && i+1 == ldof[0])
471               || (irank[1]+1 == p[1] && j+1 == ldof[1])
472               || (irank[2]+1 == p[2] && k+1 == ldof[2]))
473             continue;
474           ltogind0[l0count] = ltogind[here];
475           locind[l0count++] = here;
476         }
477       }
478     }
479     ierr = ISCreateBlock(comm, vscale, lsize, ltogind, PETSC_OWN_POINTER,
480                          &ltogis); CHKERRQ(ierr);
481     ierr = VecScatterCreate(Xloc, NULL, X, ltogis, &ltog); CHKERRQ(ierr);
482     CHKERRQ(ierr);
483     ierr = ISCreateBlock(comm, vscale, l0count, ltogind0, PETSC_OWN_POINTER,
484                          &ltogis0); CHKERRQ(ierr);
485     ierr = ISCreateBlock(comm, vscale, l0count, locind, PETSC_OWN_POINTER,
486                          &locis); CHKERRQ(ierr);
487     ierr = VecScatterCreate(Xloc, locis, X, ltogis0, &ltog0); CHKERRQ(ierr);
488     {
489       // Create global-to-global scatter for Dirichlet values (everything not in
490       // ltogis0, which is the range of ltog0)
491       PetscInt xstart, xend, *indD, countD = 0;
492       IS isD;
493       const PetscScalar *x;
494       ierr = VecZeroEntries(Xloc); CHKERRQ(ierr);
495       ierr = VecSet(X, 1.0); CHKERRQ(ierr);
496       ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD);
497       CHKERRQ(ierr);
498       ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD);
499       CHKERRQ(ierr);
500       ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr);
501       ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr);
502       ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr);
503       for (PetscInt i=0; i<xend-xstart; i++) {
504         if (x[i] == 1.) indD[countD++] = xstart + i;
505       }
506       ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr);
507       ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD);
508       CHKERRQ(ierr);
509       ierr = PetscFree(indD); CHKERRQ(ierr);
510       ierr = VecScatterCreate(X, isD, X, isD, &gtogD); CHKERRQ(ierr);
511       ierr = ISDestroy(&isD); CHKERRQ(ierr);
512     }
513     ierr = ISDestroy(&ltogis); CHKERRQ(ierr);
514     ierr = ISDestroy(&ltogis0); CHKERRQ(ierr);
515     ierr = ISDestroy(&locis); CHKERRQ(ierr);
516   }
517 
518   // Set up libCEED
519   CeedInit(ceedresource, &ceed);
520   CeedBasisCreateTensorH1Lagrange(ceed, 3, vscale, P, Q, CEED_GAUSS, &basisu);
521   CeedBasisCreateTensorH1Lagrange(ceed, 3, 3, 2, Q, CEED_GAUSS, &basisx);
522 
523   CreateRestriction(ceed, melem, P, vscale, &Erestrictu);
524   CreateRestriction(ceed, melem, 2, 3, &Erestrictx);
525   CeedInt nelem = melem[0]*melem[1]*melem[2];
526   CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, vscale,
527                                     &Erestrictui);
528   CeedElemRestrictionCreateIdentity(ceed, nelem,
529                                     bpOptions[bpChoice].qdatasize*Q*Q*Q,
530                                     bpOptions[bpChoice].qdatasize*nelem*Q*Q*Q,
531                                     1, &Erestrictqdi);
532   CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, 1,
533                                     &Erestrictxi);
534   {
535     CeedScalar *xloc;
536     CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len =
537                          shape[0]*shape[1]*shape[2];
538     xloc = malloc(len*3*sizeof xloc[0]);
539     for (CeedInt i=0; i<shape[0]; i++) {
540       for (CeedInt j=0; j<shape[1]; j++) {
541         for (CeedInt k=0; k<shape[2]; k++) {
542           xloc[((i*shape[1]+j)*shape[2]+k) + 0*len] = 1.*(irank[0]*melem[0]+i) /
543               (p[0]*melem[0]);
544           xloc[((i*shape[1]+j)*shape[2]+k) + 1*len] = 1.*(irank[1]*melem[1]+j) /
545               (p[1]*melem[1]);
546           xloc[((i*shape[1]+j)*shape[2]+k) + 2*len] = 1.*(irank[2]*melem[2]+k) /
547               (p[2]*melem[2]);
548         }
549       }
550     }
551     CeedVectorCreate(ceed, len*3, &xcoord);
552     CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc);
553   }
554 
555   // Create the Q-function that builds the operator (i.e. computes its
556   // quadrature data) and set its context data
557   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].setup,
558                               bpOptions[bpChoice].setupfname, &qf_setup);
559   CeedQFunctionAddInput(qf_setup, "x", 3, CEED_EVAL_INTERP);
560   CeedQFunctionAddInput(qf_setup, "dx", 3, CEED_EVAL_GRAD);
561   CeedQFunctionAddInput(qf_setup, "weight", 1, CEED_EVAL_WEIGHT);
562   CeedQFunctionAddOutput(qf_setup, "rho", bpOptions[bpChoice].qdatasize,
563                          CEED_EVAL_NONE);
564   CeedQFunctionAddOutput(qf_setup, "true_soln", vscale, CEED_EVAL_NONE);
565   CeedQFunctionAddOutput(qf_setup, "rhs", vscale, CEED_EVAL_INTERP);
566 
567   // Set up PDE operator
568   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].apply,
569                               bpOptions[bpChoice].applyfname, &qf_apply);
570   // Add inputs and outputs
571   CeedQFunctionAddInput(qf_apply, "u", vscale, bpOptions[bpChoice].inmode);
572   CeedQFunctionAddInput(qf_apply, "rho", bpOptions[bpChoice].qdatasize,
573                         CEED_EVAL_NONE);
574   CeedQFunctionAddOutput(qf_apply, "v", vscale, bpOptions[bpChoice].outmode);
575 
576   // Create the error qfunction
577   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].error,
578                               bpOptions[bpChoice].errorfname, &qf_error);
579   CeedQFunctionAddInput(qf_error, "u", vscale, CEED_EVAL_INTERP);
580   CeedQFunctionAddInput(qf_error, "true_soln", vscale, CEED_EVAL_NONE);
581   CeedQFunctionAddOutput(qf_error, "error", vscale, CEED_EVAL_NONE);
582 
583   // Create the persistent vectors that will be needed in setup
584   CeedInt Nqpts, Nelem = melem[0]*melem[1]*melem[2];
585   CeedBasisGetNumQuadraturePoints(basisu, &Nqpts);
586   CeedVectorCreate(ceed, bpOptions[bpChoice].qdatasize*Nelem*Nqpts, &rho);
587   CeedVectorCreate(ceed, Nelem*Nqpts*vscale, &target);
588   CeedVectorCreate(ceed, lsize*vscale, &rhsceed);
589 
590   // Create the operator that builds the quadrature data for the ceed operator
591   CeedOperatorCreate(ceed, qf_setup, NULL, NULL, &op_setup);
592   CeedOperatorSetField(op_setup, "x", Erestrictx, CEED_NOTRANSPOSE,
593                        basisx, CEED_VECTOR_ACTIVE);
594   CeedOperatorSetField(op_setup, "dx", Erestrictx, CEED_NOTRANSPOSE,
595                        basisx, CEED_VECTOR_ACTIVE);
596   CeedOperatorSetField(op_setup, "weight", Erestrictxi, CEED_NOTRANSPOSE,
597                        basisx, CEED_VECTOR_NONE);
598   CeedOperatorSetField(op_setup, "rho", Erestrictqdi, CEED_NOTRANSPOSE,
599                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
600   CeedOperatorSetField(op_setup, "true_soln", Erestrictui, CEED_NOTRANSPOSE,
601                        CEED_BASIS_COLLOCATED, target);
602   CeedOperatorSetField(op_setup, "rhs", Erestrictu, CEED_TRANSPOSE,
603                        basisu, rhsceed);
604 
605   // Create the mass or diff operator
606   CeedOperatorCreate(ceed, qf_apply, NULL, NULL, &op_apply);
607   CeedOperatorSetField(op_apply, "u", Erestrictu, CEED_TRANSPOSE,
608                        basisu, CEED_VECTOR_ACTIVE);
609   CeedOperatorSetField(op_apply, "rho", Erestrictqdi, CEED_NOTRANSPOSE,
610                        CEED_BASIS_COLLOCATED, rho);
611   CeedOperatorSetField(op_apply, "v", Erestrictu, CEED_TRANSPOSE,
612                        basisu, CEED_VECTOR_ACTIVE);
613 
614   // Create the error operator
615   CeedOperatorCreate(ceed, qf_error, NULL, NULL, &op_error);
616   CeedOperatorSetField(op_error, "u", Erestrictu, CEED_TRANSPOSE,
617                        basisu, CEED_VECTOR_ACTIVE);
618   CeedOperatorSetField(op_error, "true_soln", Erestrictui, CEED_NOTRANSPOSE,
619                        CEED_BASIS_COLLOCATED, target);
620   CeedOperatorSetField(op_error, "error", Erestrictui, CEED_NOTRANSPOSE,
621                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
622 
623 
624   // Set up Mat
625   ierr = PetscMalloc1(1, &user); CHKERRQ(ierr);
626   user->comm = comm;
627   user->ltog = ltog;
628   if (bpChoice != CEED_BP1 && bpChoice != CEED_BP2) {
629     user->ltog0 = ltog0;
630     user->gtogD = gtogD;
631   }
632   user->Xloc = Xloc;
633   ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr);
634   CeedVectorCreate(ceed, lsize*vscale, &user->xceed);
635   CeedVectorCreate(ceed, lsize*vscale, &user->yceed);
636   user->op = op_apply;
637   user->rho = rho;
638   user->ceed = ceed;
639 
640   ierr = MatCreateShell(comm, mdof[0]*mdof[1]*mdof[2]*vscale,
641                         mdof[0]*mdof[1]*mdof[2]*vscale,
642                         PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr);
643   if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) {
644     ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass);
645     CHKERRQ(ierr);
646   } else {
647     ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff);
648     CHKERRQ(ierr);
649   }
650   ierr = MatCreateVecs(mat, &rhs, NULL); CHKERRQ(ierr);
651 
652   // Get RHS vector
653   ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr);
654   ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr);
655   ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr);
656   CeedVectorSetArray(rhsceed, CEED_MEM_HOST, CEED_USE_POINTER, r);
657 
658   // Setup rho, rhs, and target
659   CeedOperatorApply(op_setup, xcoord, rho, CEED_REQUEST_IMMEDIATE);
660   ierr = CeedVectorSyncArray(rhsceed, CEED_MEM_HOST); CHKERRQ(ierr);
661   CeedVectorDestroy(&xcoord);
662 
663   // Gather RHS
664   ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr);
665   ierr = VecZeroEntries(rhs); CHKERRQ(ierr);
666   ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD);
667   CHKERRQ(ierr);
668   ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD);
669   CHKERRQ(ierr);
670   CeedVectorDestroy(&rhsceed);
671 
672   ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr);
673   {
674     PC pc;
675     ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr);
676     if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) {
677       ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr);
678       ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr);
679     } else {
680       ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr);
681     }
682     ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr);
683     ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr);
684     ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT,
685                             PETSC_DEFAULT); CHKERRQ(ierr);
686   }
687   ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr);
688   ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr);
689   // First run, if benchmarking
690   if (benchmark_mode) {
691     ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1);
692     CHKERRQ(ierr);
693     my_rt_start = MPI_Wtime();
694     ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr);
695     my_rt = MPI_Wtime() - my_rt_start;
696     // Set maxits based on first iteration timing
697     if (my_rt > 0.02) {
698       ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5);
699       CHKERRQ(ierr);
700     } else {
701       ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20);
702       CHKERRQ(ierr);
703     }
704   }
705   // Timed solve
706   my_rt_start = MPI_Wtime();
707   ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr);
708   my_rt = MPI_Wtime() - my_rt_start;
709   {
710     KSPType ksptype;
711     KSPConvergedReason reason;
712     PetscReal rnorm;
713     PetscInt its;
714     ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr);
715     ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr);
716     ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
717     ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr);
718     if (!test_mode || reason < 0 || rnorm > 1e-8) {
719       ierr = PetscPrintf(comm,
720                          "  KSP:\n"
721                          "    KSP Type                           : %s\n"
722                          "    KSP Convergence                    : %s\n"
723                          "    Total KSP Iterations               : %D\n"
724                          "    Final rnorm                        : %e\n",
725                          ksptype, KSPConvergedReasons[reason], its,
726                          (double)rnorm); CHKERRQ(ierr);
727     }
728     if (benchmark_mode && (!test_mode)) {
729       CeedInt gsize;
730       ierr = VecGetSize(X, &gsize); CHKERRQ(ierr);
731       MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
732       MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
733       ierr = PetscPrintf(comm,
734                          "  Performance:\n"
735                          "    CG Solve Time                      : %g (%g) sec\n"
736                          "    DOFs/Sec in CG                     : %g (%g) million\n",
737                          rt_max, rt_min, 1e-6*gsize*its/rt_max,
738                          1e-6*gsize*its/rt_min); CHKERRQ(ierr);
739     }
740   }
741 
742   {
743     PetscReal maxerror;
744     ierr = ComputeErrorMax(user, op_error, X, target, &maxerror); CHKERRQ(ierr);
745     PetscReal tol = (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) ? 5e-3 : 5e-2;
746     if (!test_mode || maxerror > tol) {
747       ierr = PetscPrintf(comm,
748                          "    Pointwise Error (max)              : %e\n",
749                          (double)maxerror); CHKERRQ(ierr);
750     }
751   }
752 
753   ierr = VecDestroy(&rhs); CHKERRQ(ierr);
754   ierr = VecDestroy(&rhsloc); CHKERRQ(ierr);
755   ierr = VecDestroy(&X); CHKERRQ(ierr);
756   ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr);
757   ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr);
758   ierr = VecScatterDestroy(&ltog); CHKERRQ(ierr);
759   ierr = VecScatterDestroy(&ltog0); CHKERRQ(ierr);
760   ierr = VecScatterDestroy(&gtogD); CHKERRQ(ierr);
761   ierr = MatDestroy(&mat); CHKERRQ(ierr);
762   ierr = KSPDestroy(&ksp); CHKERRQ(ierr);
763 
764   CeedVectorDestroy(&user->xceed);
765   CeedVectorDestroy(&user->yceed);
766   CeedVectorDestroy(&user->rho);
767   CeedVectorDestroy(&target);
768   CeedOperatorDestroy(&op_setup);
769   CeedOperatorDestroy(&op_apply);
770   CeedOperatorDestroy(&op_error);
771   CeedElemRestrictionDestroy(&Erestrictu);
772   CeedElemRestrictionDestroy(&Erestrictx);
773   CeedElemRestrictionDestroy(&Erestrictui);
774   CeedElemRestrictionDestroy(&Erestrictxi);
775   CeedElemRestrictionDestroy(&Erestrictqdi);
776   CeedQFunctionDestroy(&qf_setup);
777   CeedQFunctionDestroy(&qf_apply);
778   CeedQFunctionDestroy(&qf_error);
779   CeedBasisDestroy(&basisu);
780   CeedBasisDestroy(&basisx);
781   CeedDestroy(&ceed);
782   ierr = PetscFree(user); CHKERRQ(ierr);
783   return PetscFinalize();
784 }
785