xref: /libCEED/examples/petsc/bpsraw.c (revision d779c86afab13ea131a898b576e8fb4eb3b4c19b)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 //                        libCEED + PETSc Example: CEED BPs
18 //
19 // This example demonstrates a simple usage of libCEED with PETSc to solve the
20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps.
21 //
22 // The code is intentionally "raw", using only low-level communication
23 // primitives.
24 //
25 // Build with:
26 //
27 //     make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>]
28 //
29 // Sample runs:
30 //
31 //     ./bpsraw -problem bp1
32 //     ./bpsraw -problem bp2
33 //     ./bpsraw -problem bp3
34 //     ./bpsraw -problem bp4
35 //     ./bpsraw -problem bp5 -ceed /cpu/self
36 //     ./bpsraw -problem bp6 -ceed /gpu/cuda
37 //
38 //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -qextra 1 -ksp_max_it_clip 15,15
39 
40 /// @file
41 /// CEED BPs example using PETSc
42 /// See bps.c for an implementation using DMPlex unstructured grids.
43 const char help[] = "Solve CEED BPs using PETSc\n";
44 
45 #include <stdbool.h>
46 #include <string.h>
47 #include <petscksp.h>
48 #include <ceed.h>
49 #include "qfunctions/bps/common.h"
50 #include "qfunctions/bps/bp1.h"
51 #include "qfunctions/bps/bp2.h"
52 #include "qfunctions/bps/bp3.h"
53 #include "qfunctions/bps/bp4.h"
54 
55 #include <petscsys.h>
56 #if PETSC_VERSION_LT(3,12,0)
57 #ifdef PETSC_HAVE_CUDA
58 #include <petsccuda.h>
59 // Note: With PETSc prior to version 3.12.0, providing the source path to
60 //       include 'cublas_v2.h' will be needed to use 'petsccuda.h'.
61 #endif
62 #endif
63 
64 static CeedMemType MemTypeP2C(PetscMemType mtype) {
65   return PetscMemTypeDevice(mtype) ? CEED_MEM_DEVICE : CEED_MEM_HOST;
66 }
67 
68 static void Split3(PetscInt size, PetscInt m[3], bool reverse) {
69   for (PetscInt d=0,sizeleft=size; d<3; d++) {
70     PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d)));
71     while (try * (sizeleft / try) != sizeleft) try++;
72     m[reverse ? 2-d : d] = try;
73     sizeleft /= try;
74   }
75 }
76 
77 static PetscInt Max3(const PetscInt a[3]) {
78   return PetscMax(a[0], PetscMax(a[1], a[2]));
79 }
80 static PetscInt Min3(const PetscInt a[3]) {
81   return PetscMin(a[0], PetscMin(a[1], a[2]));
82 }
83 static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3],
84                         PetscInt degree, const PetscInt melem[3],
85                         PetscInt mnodes[3]) {
86   for (int d=0; d<3; d++)
87     mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1);
88 }
89 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3],
90                             PetscInt degree, const PetscInt melem[3]) {
91   PetscInt start = 0;
92   // Dumb brute-force is easier to read
93   for (PetscInt i=0; i<p[0]; i++) {
94     for (PetscInt j=0; j<p[1]; j++) {
95       for (PetscInt k=0; k<p[2]; k++) {
96         PetscInt mnodes[3], ijkrank[] = {i,j,k};
97         if (i == irank[0] && j == irank[1] && k == irank[2]) return start;
98         GlobalNodes(p, ijkrank, degree, melem, mnodes);
99         start += mnodes[0] * mnodes[1] * mnodes[2];
100       }
101     }
102   }
103   return -1;
104 }
105 static int CreateRestriction(Ceed ceed, const CeedInt melem[3], CeedInt P,
106                              CeedInt ncomp, CeedElemRestriction *Erestrict) {
107   const PetscInt nelem = melem[0]*melem[1]*melem[2];
108   PetscInt mnodes[3], *idx, *idxp;
109 
110   // Get indicies
111   for (int d=0; d<3; d++) mnodes[d] = melem[d]*(P-1) + 1;
112   idxp = idx = malloc(nelem*P*P*P*sizeof idx[0]);
113   for (CeedInt i=0; i<melem[0]; i++)
114     for (CeedInt j=0; j<melem[1]; j++)
115       for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P)
116         for (CeedInt ii=0; ii<P; ii++)
117           for (CeedInt jj=0; jj<P; jj++)
118             for (CeedInt kk=0; kk<P; kk++) {
119               if (0) { // This is the C-style (i,j,k) ordering that I prefer
120                 idxp[(ii*P+jj)*P+kk] = ncomp*(((i*(P-1)+ii)*mnodes[1]
121                                                + (j*(P-1)+jj))*mnodes[2]
122                                               + (k*(P-1)+kk));
123               } else { // (k,j,i) ordering for consistency with MFEM example
124                 idxp[ii+P*(jj+P*kk)] = ncomp*(((i*(P-1)+ii)*mnodes[1]
125                                                + (j*(P-1)+jj))*mnodes[2]
126                                               + (k*(P-1)+kk));
127               }
128             }
129 
130   // Setup CEED restriction
131   CeedElemRestrictionCreate(ceed, nelem, P*P*P, ncomp, 1,
132                             mnodes[0]*mnodes[1]*mnodes[2]*ncomp,
133                             CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict);
134 
135   PetscFunctionReturn(0);
136 }
137 
138 // Data for PETSc
139 typedef struct User_ *User;
140 struct User_ {
141   MPI_Comm comm;
142   VecScatter ltog;              // Scatter for all entries
143   VecScatter ltog0;             // Skip Dirichlet values
144   VecScatter gtogD;             // global-to-global; only Dirichlet values
145   Vec Xloc, Yloc;
146   CeedVector xceed, yceed;
147   CeedOperator op;
148   CeedVector qdata;
149   Ceed ceed;
150 };
151 
152 // BP Options
153 typedef enum {
154   CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2,
155   CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5
156 } bpType;
157 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6",
158                                       "bpType","CEED_BP",0
159                                      };
160 
161 // BP specific data
162 typedef struct {
163   CeedInt ncompu, qdatasize, qextra;
164   CeedQFunctionUser setupgeo, setuprhs, apply, error;
165   const char *setupgeofname, *setuprhsfname, *applyfname, *errorfname;
166   CeedEvalMode inmode, outmode;
167   CeedQuadMode qmode;
168 } bpData;
169 
170 bpData bpOptions[6] = {
171   [CEED_BP1] = {
172     .ncompu = 1,
173     .qdatasize = 1,
174     .qextra = 1,
175     .setupgeo = SetupMassGeo,
176     .setuprhs = SetupMassRhs,
177     .apply = Mass,
178     .error = Error,
179     .setupgeofname = SetupMassGeo_loc,
180     .setuprhsfname = SetupMassRhs_loc,
181     .applyfname = Mass_loc,
182     .errorfname = Error_loc,
183     .inmode = CEED_EVAL_INTERP,
184     .outmode = CEED_EVAL_INTERP,
185     .qmode = CEED_GAUSS
186   },
187   [CEED_BP2] = {
188     .ncompu = 3,
189     .qdatasize = 1,
190     .qextra = 1,
191     .setupgeo = SetupMassGeo,
192     .setuprhs = SetupMassRhs3,
193     .apply = Mass3,
194     .error = Error3,
195     .setupgeofname = SetupMassGeo_loc,
196     .setuprhsfname = SetupMassRhs3_loc,
197     .applyfname = Mass3_loc,
198     .errorfname = Error3_loc,
199     .inmode = CEED_EVAL_INTERP,
200     .outmode = CEED_EVAL_INTERP,
201     .qmode = CEED_GAUSS
202   },
203   [CEED_BP3] = {
204     .ncompu = 1,
205     .qdatasize = 6,
206     .qextra = 1,
207     .setupgeo = SetupDiffGeo,
208     .setuprhs = SetupDiffRhs,
209     .apply = Diff,
210     .error = Error,
211     .setupgeofname = SetupDiffGeo_loc,
212     .setuprhsfname = SetupDiffRhs_loc,
213     .applyfname = Diff_loc,
214     .errorfname = Error_loc,
215     .inmode = CEED_EVAL_GRAD,
216     .outmode = CEED_EVAL_GRAD,
217     .qmode = CEED_GAUSS
218   },
219   [CEED_BP4] = {
220     .ncompu = 3,
221     .qdatasize = 6,
222     .qextra = 1,
223     .setupgeo = SetupDiffGeo,
224     .setuprhs = SetupDiffRhs3,
225     .apply = Diff3,
226     .error = Error3,
227     .setupgeofname = SetupDiffGeo_loc,
228     .setuprhsfname = SetupDiffRhs3_loc,
229     .applyfname = Diff3_loc,
230     .errorfname = Error3_loc,
231     .inmode = CEED_EVAL_GRAD,
232     .outmode = CEED_EVAL_GRAD,
233     .qmode = CEED_GAUSS
234   },
235   [CEED_BP5] = {
236     .ncompu = 1,
237     .qdatasize = 6,
238     .qextra = 0,
239     .setupgeo = SetupDiffGeo,
240     .setuprhs = SetupDiffRhs,
241     .apply = Diff,
242     .error = Error,
243     .setupgeofname = SetupDiffGeo_loc,
244     .setuprhsfname = SetupDiffRhs_loc,
245     .applyfname = Diff_loc,
246     .errorfname = Error_loc,
247     .inmode = CEED_EVAL_GRAD,
248     .outmode = CEED_EVAL_GRAD,
249     .qmode = CEED_GAUSS_LOBATTO
250   },
251   [CEED_BP6] = {
252     .ncompu = 3,
253     .qdatasize = 6,
254     .qextra = 0,
255     .setupgeo = SetupDiffGeo,
256     .setuprhs = SetupDiffRhs3,
257     .apply = Diff3,
258     .error = Error3,
259     .setupgeofname = SetupDiffGeo_loc,
260     .setuprhsfname = SetupDiffRhs3_loc,
261     .applyfname = Diff3_loc,
262     .errorfname = Error3_loc,
263     .inmode = CEED_EVAL_GRAD,
264     .outmode = CEED_EVAL_GRAD,
265     .qmode = CEED_GAUSS_LOBATTO
266   }
267 };
268 
269 // This function uses libCEED to compute the action of the mass matrix
270 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) {
271   PetscErrorCode ierr;
272   User user;
273   PetscScalar *x, *y;
274   PetscMemType xmemtype, ymemtype;
275 
276   PetscFunctionBeginUser;
277 
278   ierr = MatShellGetContext(A, &user); CHKERRQ(ierr);
279 
280   // Global-to-local
281   ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES,
282                          SCATTER_REVERSE); CHKERRQ(ierr);
283   ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES,
284                        SCATTER_REVERSE); CHKERRQ(ierr);
285 
286   // Setup libCEED vectors
287   ierr = VecGetArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x,
288                                    &xmemtype); CHKERRQ(ierr);
289   ierr = VecGetArrayAndMemType(user->Yloc, &y, &ymemtype); CHKERRQ(ierr);
290   CeedVectorSetArray(user->xceed, MemTypeP2C(xmemtype), CEED_USE_POINTER, x);
291   CeedVectorSetArray(user->yceed, MemTypeP2C(ymemtype), CEED_USE_POINTER, y);
292 
293   // Apply libCEED operator
294   CeedOperatorApply(user->op, user->xceed, user->yceed,
295                     CEED_REQUEST_IMMEDIATE);
296 
297   // Restore PETSc vectors
298   CeedVectorTakeArray(user->xceed, MemTypeP2C(xmemtype), NULL);
299   CeedVectorTakeArray(user->yceed, MemTypeP2C(ymemtype), NULL);
300   ierr = VecRestoreArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x);
301   CHKERRQ(ierr);
302   ierr = VecRestoreArrayAndMemType(user->Yloc, &y); CHKERRQ(ierr);
303 
304   // Local-to-global
305   if (Y) {
306     ierr = VecZeroEntries(Y); CHKERRQ(ierr);
307     ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES,
308                            SCATTER_FORWARD); CHKERRQ(ierr);
309     ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES,
310                          SCATTER_FORWARD); CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 // This function uses libCEED to compute the action of the Laplacian with
316 // Dirichlet boundary conditions
317 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) {
318   PetscErrorCode ierr;
319   User user;
320   PetscScalar *x, *y;
321   PetscMemType xmemtype, ymemtype;
322 
323   PetscFunctionBeginUser;
324 
325   ierr = MatShellGetContext(A, &user); CHKERRQ(ierr);
326 
327   // Global-to-local
328   ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES,
329                          SCATTER_REVERSE); CHKERRQ(ierr);
330   ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES,
331                        SCATTER_REVERSE);
332   CHKERRQ(ierr);
333 
334   // Setup libCEED vectors
335   ierr = VecGetArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x,
336                                    &xmemtype); CHKERRQ(ierr);
337   ierr = VecGetArrayAndMemType(user->Yloc, &y, &ymemtype); CHKERRQ(ierr);
338   CeedVectorSetArray(user->xceed, MemTypeP2C(xmemtype), CEED_USE_POINTER, x);
339   CeedVectorSetArray(user->yceed, MemTypeP2C(ymemtype), CEED_USE_POINTER, y);
340 
341   // Apply libCEED operator
342   CeedOperatorApply(user->op, user->xceed, user->yceed,
343                     CEED_REQUEST_IMMEDIATE);
344 
345   // Restore PETSc vectors
346   CeedVectorTakeArray(user->xceed, MemTypeP2C(xmemtype), NULL);
347   CeedVectorTakeArray(user->yceed, MemTypeP2C(ymemtype), NULL);
348   ierr = VecRestoreArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x);
349   CHKERRQ(ierr);
350   ierr = VecRestoreArrayAndMemType(user->Yloc, &y); CHKERRQ(ierr);
351 
352   // Local-to-global
353   ierr = VecZeroEntries(Y); CHKERRQ(ierr);
354   ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD);
355   CHKERRQ(ierr);
356   ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD);
357   CHKERRQ(ierr);
358   ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES,
359                          SCATTER_FORWARD); CHKERRQ(ierr);
360   ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD);
361   CHKERRQ(ierr);
362 
363   PetscFunctionReturn(0);
364 }
365 
366 // This function calculates the error in the final solution
367 static PetscErrorCode ComputeErrorMax(User user, CeedOperator operror, Vec X,
368                                       CeedVector target, PetscReal *maxerror) {
369   PetscErrorCode ierr;
370   PetscScalar *x;
371   PetscMemType memtype;
372   CeedVector collocated_error;
373   CeedInt length;
374 
375   PetscFunctionBeginUser;
376 
377   CeedVectorGetLength(target, &length);
378   CeedVectorCreate(user->ceed, length, &collocated_error);
379 
380   // Global-to-local
381   ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES,
382                          SCATTER_REVERSE); CHKERRQ(ierr);
383   ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES,
384                        SCATTER_REVERSE); CHKERRQ(ierr);
385 
386   // Setup libCEED vector
387   ierr = VecGetArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x,
388                                    &memtype); CHKERRQ(ierr);
389   CeedVectorSetArray(user->xceed, MemTypeP2C(memtype), CEED_USE_POINTER, x);
390 
391   // Apply libCEED operator
392   CeedOperatorApply(operror, user->xceed, collocated_error,
393                     CEED_REQUEST_IMMEDIATE);
394 
395   // Restore PETSc vector
396   CeedVectorTakeArray(user->xceed, MemTypeP2C(memtype), NULL);
397   ierr = VecRestoreArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x);
398   CHKERRQ(ierr);
399 
400   // Reduce max error
401   *maxerror = 0;
402   const CeedScalar *e;
403   CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e);
404   for (CeedInt i=0; i<length; i++) {
405     *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i]));
406   }
407   CeedVectorRestoreArrayRead(collocated_error, &e);
408   ierr = MPI_Allreduce(MPI_IN_PLACE, maxerror, 1, MPIU_REAL, MPIU_MAX,
409                        user->comm); CHKERRQ(ierr);
410 
411   // Cleanup
412   CeedVectorDestroy(&collocated_error);
413 
414   PetscFunctionReturn(0);
415 }
416 
417 int main(int argc, char **argv) {
418   PetscInt ierr;
419   MPI_Comm comm;
420   char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self";
421   double my_rt_start, my_rt, rt_min, rt_max;
422   PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3],
423            irank[3], lnodes[3], lsize, ncompu = 1, ksp_max_it_clip[2];
424   PetscScalar *r;
425   PetscBool test_mode, benchmark_mode, write_solution;
426   PetscMPIInt size, rank;
427   PetscLogStage solvestage;
428   Vec X, Xloc, rhs, rhsloc;
429   Mat mat;
430   KSP ksp;
431   VecScatter ltog, ltog0, gtogD;
432   PetscMemType memtype;
433   User user;
434   Ceed ceed;
435   CeedBasis basisx, basisu;
436   CeedElemRestriction Erestrictx, Erestrictu, Erestrictui, Erestrictqdi;
437   CeedQFunction qfsetupgeo, qfsetuprhs, qfapply, qferror;
438   CeedOperator opsetupgeo, opsetuprhs, opapply, operror;
439   CeedVector xcoord, qdata, rhsceed, target;
440   CeedInt P, Q;
441   const CeedInt dim = 3, ncompx = 3;
442   bpType bpchoice;
443 
444   ierr = PetscInitialize(&argc, &argv, NULL, help);
445   if (ierr) return ierr;
446   comm = PETSC_COMM_WORLD;
447 
448   // Read command line options
449   ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
450   bpchoice = CEED_BP1;
451   ierr = PetscOptionsEnum("-problem",
452                           "CEED benchmark problem to solve", NULL,
453                           bpTypes, (PetscEnum)bpchoice, (PetscEnum *)&bpchoice,
454                           NULL); CHKERRQ(ierr);
455   ncompu = bpOptions[bpchoice].ncompu;
456   test_mode = PETSC_FALSE;
457   ierr = PetscOptionsBool("-test",
458                           "Testing mode (do not print unless error is large)",
459                           NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr);
460   benchmark_mode = PETSC_FALSE;
461   ierr = PetscOptionsBool("-benchmark",
462                           "Benchmarking mode (prints benchmark statistics)",
463                           NULL, benchmark_mode, &benchmark_mode, NULL);
464   CHKERRQ(ierr);
465   write_solution = PETSC_FALSE;
466   ierr = PetscOptionsBool("-write_solution",
467                           "Write solution for visualization",
468                           NULL, write_solution, &write_solution, NULL);
469   CHKERRQ(ierr);
470   degree = test_mode ? 3 : 1;
471   ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis",
472                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
473   qextra = bpOptions[bpchoice].qextra;
474   ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points",
475                          NULL, qextra, &qextra, NULL); CHKERRQ(ierr);
476   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
477                             NULL, ceedresource, ceedresource,
478                             sizeof(ceedresource), NULL); CHKERRQ(ierr);
479   localnodes = 1000;
480   ierr = PetscOptionsInt("-local",
481                          "Target number of locally owned nodes per process",
482                          NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr);
483   PetscInt two = 2;
484   ksp_max_it_clip[0] = 5;
485   ksp_max_it_clip[1] = 20;
486   ierr = PetscOptionsIntArray("-ksp_max_it_clip",
487                               "Min and max number of iterations to use during benchmarking",
488                               NULL, ksp_max_it_clip, &two, NULL);
489   CHKERRQ(ierr);
490   ierr = PetscOptionsEnd(); CHKERRQ(ierr);
491   P = degree + 1;
492   Q = P + qextra;
493 
494   // Set up libCEED
495   CeedInit(ceedresource, &ceed);
496   CeedMemType memtypebackend;
497   CeedGetPreferredMemType(ceed, &memtypebackend);
498 
499   VecType default_vectype = NULL, vectype;
500   switch (memtypebackend) {
501   case CEED_MEM_HOST: default_vectype = VECSTANDARD; break;
502   case CEED_MEM_DEVICE: {
503     const char *resolved;
504     CeedGetResource(ceed, &resolved);
505     if (strstr(resolved, "/gpu/cuda")) default_vectype = VECCUDA;
506     else if (strstr(resolved, "/gpu/hip/occa"))
507       default_vectype = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678
508     else if (strstr(resolved, "/gpu/hip")) default_vectype = VECHIP;
509     else default_vectype = VECSTANDARD;
510   }
511   }
512 
513   // Determine size of process grid
514   ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr);
515   Split3(size, p, false);
516 
517   // Find a nicely composite number of elements no less than localnodes
518   for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ;
519        localelem++) {
520     Split3(localelem, melem, true);
521     if (Max3(melem) / Min3(melem) <= 2) break;
522   }
523 
524   // Find my location in the process grid
525   ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr);
526   for (int d=0,rankleft=rank; d<dim; d++) {
527     const int pstride[3] = {p[1] *p[2], p[2], 1};
528     irank[d] = rankleft / pstride[d];
529     rankleft -= irank[d] * pstride[d];
530   }
531 
532   GlobalNodes(p, irank, degree, melem, mnodes);
533 
534   // Setup global vector
535   ierr = VecCreate(comm, &X); CHKERRQ(ierr);
536   ierr = VecSetType(X, default_vectype); CHKERRQ(ierr);
537   ierr = VecSetSizes(X, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE);
538   CHKERRQ(ierr);
539   ierr = VecSetFromOptions(X); CHKERRQ(ierr);
540   ierr = VecSetUp(X); CHKERRQ(ierr);
541 
542   // Set up libCEED
543   CeedInit(ceedresource, &ceed);
544 
545   // Print summary
546   CeedInt gsize;
547   ierr = VecGetSize(X, &gsize); CHKERRQ(ierr);
548   if (!test_mode) {
549     const char *usedresource;
550     CeedGetResource(ceed, &usedresource);
551 
552     ierr = VecGetType(X, &vectype); CHKERRQ(ierr);
553 
554     ierr = PetscPrintf(comm,
555                        "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n"
556                        "  PETSc:\n"
557                        "    PETSc Vec Type                     : %s\n"
558                        "  libCEED:\n"
559                        "    libCEED Backend                    : %s\n"
560                        "    libCEED Backend MemType            : %s\n"
561                        "  Mesh:\n"
562                        "    Number of 1D Basis Nodes (P)       : %d\n"
563                        "    Number of 1D Quadrature Points (Q) : %d\n"
564                        "    Global nodes                       : %D\n"
565                        "    Process Decomposition              : %D %D %D\n"
566                        "    Local Elements                     : %D = %D %D %D\n"
567                        "    Owned nodes                        : %D = %D %D %D\n"
568                        "    DoF per node                       : %D\n",
569                        bpchoice+1, vectype, usedresource,
570                        CeedMemTypes[memtypebackend],
571                        P, Q,  gsize/ncompu, p[0], p[1], p[2], localelem,
572                        melem[0], melem[1], melem[2],
573                        mnodes[0]*mnodes[1]*mnodes[2], mnodes[0], mnodes[1],
574                        mnodes[2], ncompu); CHKERRQ(ierr);
575   }
576 
577   {
578     lsize = 1;
579     for (int d=0; d<dim; d++) {
580       lnodes[d] = melem[d]*degree + 1;
581       lsize *= lnodes[d];
582     }
583     ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr);
584     ierr = VecSetType(Xloc, default_vectype); CHKERRQ(ierr);
585     ierr = VecSetSizes(Xloc, lsize*ncompu, PETSC_DECIDE); CHKERRQ(ierr);
586     ierr = VecSetFromOptions(Xloc); CHKERRQ(ierr);
587     ierr = VecSetUp(Xloc); CHKERRQ(ierr);
588 
589     // Create local-to-global scatter
590     PetscInt *ltogind, *ltogind0, *locind, l0count;
591     IS ltogis, ltogis0, locis;
592     PetscInt gstart[2][2][2], gmnodes[2][2][2][dim];
593 
594     for (int i=0; i<2; i++) {
595       for (int j=0; j<2; j++) {
596         for (int k=0; k<2; k++) {
597           PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k};
598           gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem);
599           GlobalNodes(p, ijkrank, degree, melem, gmnodes[i][j][k]);
600         }
601       }
602     }
603 
604     ierr = PetscMalloc1(lsize, &ltogind); CHKERRQ(ierr);
605     ierr = PetscMalloc1(lsize, &ltogind0); CHKERRQ(ierr);
606     ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr);
607     l0count = 0;
608     for (PetscInt i=0,ir,ii; ir=i>=mnodes[0], ii=i-ir*mnodes[0], i<lnodes[0]; i++)
609       for (PetscInt j=0,jr,jj; jr=j>=mnodes[1], jj=j-jr*mnodes[1], j<lnodes[1]; j++)
610         for (PetscInt k=0,kr,kk; kr=k>=mnodes[2], kk=k-kr*mnodes[2], k<lnodes[2]; k++) {
611           PetscInt here = (i*lnodes[1]+j)*lnodes[2]+k;
612           ltogind[here] =
613             gstart[ir][jr][kr] + (ii*gmnodes[ir][jr][kr][1]+jj)*gmnodes[ir][jr][kr][2]+kk;
614           if ((irank[0] == 0 && i == 0)
615               || (irank[1] == 0 && j == 0)
616               || (irank[2] == 0 && k == 0)
617               || (irank[0]+1 == p[0] && i+1 == lnodes[0])
618               || (irank[1]+1 == p[1] && j+1 == lnodes[1])
619               || (irank[2]+1 == p[2] && k+1 == lnodes[2]))
620             continue;
621           ltogind0[l0count] = ltogind[here];
622           locind[l0count++] = here;
623         }
624     ierr = ISCreateBlock(comm, ncompu, lsize, ltogind, PETSC_OWN_POINTER,
625                          &ltogis); CHKERRQ(ierr);
626     ierr = VecScatterCreate(Xloc, NULL, X, ltogis, &ltog); CHKERRQ(ierr);
627     CHKERRQ(ierr);
628     ierr = ISCreateBlock(comm, ncompu, l0count, ltogind0, PETSC_OWN_POINTER,
629                          &ltogis0); CHKERRQ(ierr);
630     ierr = ISCreateBlock(comm, ncompu, l0count, locind, PETSC_OWN_POINTER,
631                          &locis); CHKERRQ(ierr);
632     ierr = VecScatterCreate(Xloc, locis, X, ltogis0, &ltog0); CHKERRQ(ierr);
633     {
634       // Create global-to-global scatter for Dirichlet values (everything not in
635       // ltogis0, which is the range of ltog0)
636       PetscInt xstart, xend, *indD, countD = 0;
637       IS isD;
638       const PetscScalar *x;
639       ierr = VecZeroEntries(Xloc); CHKERRQ(ierr);
640       ierr = VecSet(X, 1.0); CHKERRQ(ierr);
641       ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD);
642       CHKERRQ(ierr);
643       ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD);
644       CHKERRQ(ierr);
645       ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr);
646       ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr);
647       ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr);
648       for (PetscInt i=0; i<xend-xstart; i++) {
649         if (x[i] == 1.) indD[countD++] = xstart + i;
650       }
651       ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr);
652       ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD);
653       CHKERRQ(ierr);
654       ierr = PetscFree(indD); CHKERRQ(ierr);
655       ierr = VecScatterCreate(X, isD, X, isD, &gtogD); CHKERRQ(ierr);
656       ierr = ISDestroy(&isD); CHKERRQ(ierr);
657     }
658     ierr = ISDestroy(&ltogis); CHKERRQ(ierr);
659     ierr = ISDestroy(&ltogis0); CHKERRQ(ierr);
660     ierr = ISDestroy(&locis); CHKERRQ(ierr);
661   }
662 
663   // CEED bases
664   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompu, P, Q,
665                                   bpOptions[bpchoice].qmode, &basisu);
666   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, Q,
667                                   bpOptions[bpchoice].qmode, &basisx);
668 
669   // CEED restrictions
670   CreateRestriction(ceed, melem, P, ncompu, &Erestrictu);
671   CreateRestriction(ceed, melem, 2, dim, &Erestrictx);
672   CeedInt nelem = melem[0]*melem[1]*melem[2];
673   CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, ncompu,
674                                    ncompu*nelem*Q*Q*Q,
675                                    CEED_STRIDES_BACKEND, &Erestrictui);
676   CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q,
677                                    bpOptions[bpchoice].qdatasize,
678                                    bpOptions[bpchoice].qdatasize*nelem*Q*Q*Q,
679                                    CEED_STRIDES_BACKEND, &Erestrictqdi);
680   {
681     CeedScalar *xloc;
682     CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len =
683                          shape[0]*shape[1]*shape[2];
684     xloc = malloc(len*ncompx*sizeof xloc[0]);
685     for (CeedInt i=0; i<shape[0]; i++) {
686       for (CeedInt j=0; j<shape[1]; j++) {
687         for (CeedInt k=0; k<shape[2]; k++) {
688           xloc[dim*((i*shape[1]+j)*shape[2]+k) + 0] = 1.*(irank[0]*melem[0]+i) /
689               (p[0]*melem[0]);
690           xloc[dim*((i*shape[1]+j)*shape[2]+k) + 1] = 1.*(irank[1]*melem[1]+j) /
691               (p[1]*melem[1]);
692           xloc[dim*((i*shape[1]+j)*shape[2]+k) + 2] = 1.*(irank[2]*melem[2]+k) /
693               (p[2]*melem[2]);
694         }
695       }
696     }
697     CeedVectorCreate(ceed, len*ncompx, &xcoord);
698     CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc);
699   }
700 
701   // Create the Qfunction that builds the operator quadrature data
702   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setupgeo,
703                               bpOptions[bpchoice].setupgeofname, &qfsetupgeo);
704   CeedQFunctionAddInput(qfsetupgeo, "dx", ncompx*dim, CEED_EVAL_GRAD);
705   CeedQFunctionAddInput(qfsetupgeo, "weight", 1, CEED_EVAL_WEIGHT);
706   CeedQFunctionAddOutput(qfsetupgeo, "qdata", bpOptions[bpchoice].qdatasize,
707                          CEED_EVAL_NONE);
708 
709   // Create the Qfunction that sets up the RHS and true solution
710   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setuprhs,
711                               bpOptions[bpchoice].setuprhsfname, &qfsetuprhs);
712   CeedQFunctionAddInput(qfsetuprhs, "x", ncompx, CEED_EVAL_INTERP);
713   CeedQFunctionAddInput(qfsetuprhs, "dx", ncompx*dim, CEED_EVAL_GRAD);
714   CeedQFunctionAddInput(qfsetuprhs, "weight", 1, CEED_EVAL_WEIGHT);
715   CeedQFunctionAddOutput(qfsetuprhs, "true_soln", ncompu, CEED_EVAL_NONE);
716   CeedQFunctionAddOutput(qfsetuprhs, "rhs", ncompu, CEED_EVAL_INTERP);
717 
718   // Set up PDE operator
719   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].apply,
720                               bpOptions[bpchoice].applyfname, &qfapply);
721   // Add inputs and outputs
722   CeedInt inscale = bpOptions[bpchoice].inmode==CEED_EVAL_GRAD ? 3 : 1;
723   CeedInt outscale = bpOptions[bpchoice].outmode==CEED_EVAL_GRAD ? 3 : 1;
724   CeedQFunctionAddInput(qfapply, "u", ncompu*inscale,
725                         bpOptions[bpchoice].inmode);
726   CeedQFunctionAddInput(qfapply, "qdata", bpOptions[bpchoice].qdatasize,
727                         CEED_EVAL_NONE);
728   CeedQFunctionAddOutput(qfapply, "v", ncompu*outscale,
729                          bpOptions[bpchoice].outmode);
730 
731   // Create the error qfunction
732   CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].error,
733                               bpOptions[bpchoice].errorfname, &qferror);
734   CeedQFunctionAddInput(qferror, "u", ncompu, CEED_EVAL_INTERP);
735   CeedQFunctionAddInput(qferror, "true_soln", ncompu, CEED_EVAL_NONE);
736   CeedQFunctionAddOutput(qferror, "error", ncompu, CEED_EVAL_NONE);
737 
738   // Create the persistent vectors that will be needed in setup
739   CeedInt nqpts;
740   CeedBasisGetNumQuadraturePoints(basisu, &nqpts);
741   CeedVectorCreate(ceed, bpOptions[bpchoice].qdatasize*nelem*nqpts, &qdata);
742   CeedVectorCreate(ceed, nelem*nqpts*ncompu, &target);
743   CeedVectorCreate(ceed, lsize*ncompu, &rhsceed);
744 
745   // Create the operator that builds the quadrature data for the ceed operator
746   CeedOperatorCreate(ceed, qfsetupgeo, CEED_QFUNCTION_NONE,
747                      CEED_QFUNCTION_NONE, &opsetupgeo);
748   CeedOperatorSetField(opsetupgeo, "dx", Erestrictx, basisx,
749                        CEED_VECTOR_ACTIVE);
750   CeedOperatorSetField(opsetupgeo, "weight", CEED_ELEMRESTRICTION_NONE, basisx,
751                        CEED_VECTOR_NONE);
752   CeedOperatorSetField(opsetupgeo, "qdata", Erestrictqdi,
753                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
754 
755   // Create the operator that builds the RHS and true solution
756   CeedOperatorCreate(ceed, qfsetuprhs, CEED_QFUNCTION_NONE,
757                      CEED_QFUNCTION_NONE, &opsetuprhs);
758   CeedOperatorSetField(opsetuprhs, "x", Erestrictx, basisx,
759                        CEED_VECTOR_ACTIVE);
760   CeedOperatorSetField(opsetuprhs, "dx", Erestrictx, basisx,
761                        CEED_VECTOR_ACTIVE);
762   CeedOperatorSetField(opsetuprhs, "weight", CEED_ELEMRESTRICTION_NONE, basisx,
763                        CEED_VECTOR_NONE);
764   CeedOperatorSetField(opsetuprhs, "true_soln", Erestrictui,
765                        CEED_BASIS_COLLOCATED, target);
766   CeedOperatorSetField(opsetuprhs, "rhs", Erestrictu, basisu,
767                        CEED_VECTOR_ACTIVE);
768 
769   // Create the mass or diff operator
770   CeedOperatorCreate(ceed, qfapply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
771                      &opapply);
772   CeedOperatorSetField(opapply, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE);
773   CeedOperatorSetField(opapply, "qdata", Erestrictqdi, CEED_BASIS_COLLOCATED,
774                        qdata);
775   CeedOperatorSetField(opapply, "v", Erestrictu, basisu, CEED_VECTOR_ACTIVE);
776 
777   // Create the error operator
778   CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
779                      &operror);
780   CeedOperatorSetField(operror, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE);
781   CeedOperatorSetField(operror, "true_soln", Erestrictui,
782                        CEED_BASIS_COLLOCATED, target);
783   CeedOperatorSetField(operror, "error", Erestrictui, CEED_BASIS_COLLOCATED,
784                        CEED_VECTOR_ACTIVE);
785 
786   // Set up Mat
787   ierr = PetscMalloc1(1, &user); CHKERRQ(ierr);
788   user->comm = comm;
789   user->ltog = ltog;
790   if (bpchoice != CEED_BP1 && bpchoice != CEED_BP2) {
791     user->ltog0 = ltog0;
792     user->gtogD = gtogD;
793   }
794   user->Xloc = Xloc;
795   ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr);
796   CeedVectorCreate(ceed, lsize*ncompu, &user->xceed);
797   CeedVectorCreate(ceed, lsize*ncompu, &user->yceed);
798   user->op = opapply;
799   user->qdata = qdata;
800   user->ceed = ceed;
801 
802   ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu,
803                         mnodes[0]*mnodes[1]*mnodes[2]*ncompu,
804                         PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr);
805   if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) {
806     ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass);
807     CHKERRQ(ierr);
808   } else {
809     ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff);
810     CHKERRQ(ierr);
811   }
812   ierr = VecGetType(X, &vectype); CHKERRQ(ierr);
813   ierr = MatShellSetVecType(mat, vectype); CHKERRQ(ierr);
814 
815   // Get RHS vector
816   ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr);
817   ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr);
818   ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr);
819   ierr = VecGetArrayAndMemType(rhsloc, &r, &memtype); CHKERRQ(ierr);
820   CeedVectorSetArray(rhsceed, MemTypeP2C(memtype), CEED_USE_POINTER, r);
821 
822   // Setup qdata, rhs, and target
823   CeedOperatorApply(opsetupgeo, xcoord, qdata, CEED_REQUEST_IMMEDIATE);
824   CeedOperatorApply(opsetuprhs, xcoord, rhsceed, CEED_REQUEST_IMMEDIATE);
825   CeedVectorDestroy(&xcoord);
826 
827   // Gather RHS
828   ierr = CeedVectorTakeArray(rhsceed, MemTypeP2C(memtype), NULL); CHKERRQ(ierr);
829   ierr = VecRestoreArrayAndMemType(rhsloc, &r); CHKERRQ(ierr);
830   ierr = VecZeroEntries(rhs); CHKERRQ(ierr);
831   ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD);
832   CHKERRQ(ierr);
833   ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD);
834   CHKERRQ(ierr);
835   CeedVectorDestroy(&rhsceed);
836 
837   ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr);
838   {
839     PC pc;
840     ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr);
841     if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) {
842       ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr);
843       ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr);
844     } else {
845       ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr);
846     }
847     ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr);
848     ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr);
849     ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT,
850                             PETSC_DEFAULT); CHKERRQ(ierr);
851   }
852   ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr);
853   // First run's performance log is not considered for benchmarking purposes
854   ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1);
855   CHKERRQ(ierr);
856   my_rt_start = MPI_Wtime();
857   ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr);
858   my_rt = MPI_Wtime() - my_rt_start;
859   ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm);
860   CHKERRQ(ierr);
861   // Set maxits based on first iteration timing
862   if (my_rt > 0.02) {
863     ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT,
864                             ksp_max_it_clip[0]); CHKERRQ(ierr);
865   } else {
866     ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT,
867                             ksp_max_it_clip[1]); CHKERRQ(ierr);
868   }
869   ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr);
870 
871   // Timed solve
872   ierr = VecZeroEntries(X); CHKERRQ(ierr);
873   ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr);
874 
875   // -- Performance logging
876   ierr = PetscLogStageRegister("Solve Stage", &solvestage); CHKERRQ(ierr);
877   ierr = PetscLogStagePush(solvestage); CHKERRQ(ierr);
878 
879   // -- Solve
880   my_rt_start = MPI_Wtime();
881   ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr);
882   my_rt = MPI_Wtime() - my_rt_start;
883 
884   // -- Performance logging
885   ierr = PetscLogStagePop();
886 
887   // Output results
888   {
889     KSPType ksptype;
890     KSPConvergedReason reason;
891     PetscReal rnorm;
892     PetscInt its;
893     ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr);
894     ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr);
895     ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
896     ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr);
897     if (!test_mode || reason < 0 || rnorm > 1e-9) {
898       ierr = PetscPrintf(comm,
899                          "  KSP:\n"
900                          "    KSP Type                           : %s\n"
901                          "    KSP Convergence                    : %s\n"
902                          "    Total KSP Iterations               : %D\n"
903                          "    Final rnorm                        : %e\n",
904                          ksptype, KSPConvergedReasons[reason], its,
905                          (double)rnorm); CHKERRQ(ierr);
906     }
907     if (!test_mode) {
908       ierr = PetscPrintf(comm,"  Performance:\n"); CHKERRQ(ierr);
909     }
910     {
911       PetscReal maxerror;
912       ierr = ComputeErrorMax(user, operror, X, target, &maxerror);
913       CHKERRQ(ierr);
914       PetscReal tol = 5e-2;
915       if (!test_mode || maxerror > tol) {
916         ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm);
917         CHKERRQ(ierr);
918         ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm);
919         CHKERRQ(ierr);
920         ierr = PetscPrintf(comm,
921                            "    Pointwise Error (max)              : %e\n"
922                            "    CG Solve Time                      : %g (%g) sec\n",
923                            (double)maxerror, rt_max, rt_min); CHKERRQ(ierr);
924       }
925     }
926     if (!test_mode) {
927       ierr = PetscPrintf(comm,
928                          "    DoFs/Sec in CG                     : %g (%g) million\n",
929                          1e-6*gsize*its/rt_max,
930                          1e-6*gsize*its/rt_min); CHKERRQ(ierr);
931     }
932   }
933 
934   if (write_solution) {
935     PetscViewer vtkviewersoln;
936 
937     ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr);
938     ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr);
939     ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr);
940     ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr);
941     ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr);
942   }
943 
944   ierr = VecDestroy(&rhs); CHKERRQ(ierr);
945   ierr = VecDestroy(&rhsloc); CHKERRQ(ierr);
946   ierr = VecDestroy(&X); CHKERRQ(ierr);
947   ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr);
948   ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr);
949   ierr = VecScatterDestroy(&ltog); CHKERRQ(ierr);
950   ierr = VecScatterDestroy(&ltog0); CHKERRQ(ierr);
951   ierr = VecScatterDestroy(&gtogD); CHKERRQ(ierr);
952   ierr = MatDestroy(&mat); CHKERRQ(ierr);
953   ierr = KSPDestroy(&ksp); CHKERRQ(ierr);
954 
955   CeedVectorDestroy(&user->xceed);
956   CeedVectorDestroy(&user->yceed);
957   CeedVectorDestroy(&user->qdata);
958   CeedVectorDestroy(&target);
959   CeedOperatorDestroy(&opsetupgeo);
960   CeedOperatorDestroy(&opsetuprhs);
961   CeedOperatorDestroy(&opapply);
962   CeedOperatorDestroy(&operror);
963   CeedElemRestrictionDestroy(&Erestrictu);
964   CeedElemRestrictionDestroy(&Erestrictx);
965   CeedElemRestrictionDestroy(&Erestrictui);
966   CeedElemRestrictionDestroy(&Erestrictqdi);
967   CeedQFunctionDestroy(&qfsetupgeo);
968   CeedQFunctionDestroy(&qfsetuprhs);
969   CeedQFunctionDestroy(&qfapply);
970   CeedQFunctionDestroy(&qferror);
971   CeedBasisDestroy(&basisu);
972   CeedBasisDestroy(&basisx);
973   CeedDestroy(&ceed);
974   ierr = PetscFree(user); CHKERRQ(ierr);
975   return PetscFinalize();
976 }
977