1*cb32e2e7SValeria Barra // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2*cb32e2e7SValeria Barra // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3*cb32e2e7SValeria Barra // reserved. See files LICENSE and NOTICE for details. 4*cb32e2e7SValeria Barra // 5*cb32e2e7SValeria Barra // This file is part of CEED, a collection of benchmarks, miniapps, software 6*cb32e2e7SValeria Barra // libraries and APIs for efficient high-order finite element and spectral 7*cb32e2e7SValeria Barra // element discretizations for exascale applications. For more information and 8*cb32e2e7SValeria Barra // source code availability see http://github.com/ceed. 9*cb32e2e7SValeria Barra // 10*cb32e2e7SValeria Barra // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*cb32e2e7SValeria Barra // a collaborative effort of two U.S. Department of Energy organizations (Office 12*cb32e2e7SValeria Barra // of Science and the National Nuclear Security Administration) responsible for 13*cb32e2e7SValeria Barra // the planning and preparation of a capable exascale ecosystem, including 14*cb32e2e7SValeria Barra // software, applications, hardware, advanced system engineering and early 15*cb32e2e7SValeria Barra // testbed platforms, in support of the nation's exascale computing imperative. 16*cb32e2e7SValeria Barra 17*cb32e2e7SValeria Barra // libCEED + PETSc Example: CEED BPs 18*cb32e2e7SValeria Barra // 19*cb32e2e7SValeria Barra // This example demonstrates a simple usage of libCEED with PETSc to solve the 20*cb32e2e7SValeria Barra // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21*cb32e2e7SValeria Barra // 22*cb32e2e7SValeria Barra // The code is intentionally "raw", using only low-level communication 23*cb32e2e7SValeria Barra // primitives. 24*cb32e2e7SValeria Barra // 25*cb32e2e7SValeria Barra // Build with: 26*cb32e2e7SValeria Barra // 27*cb32e2e7SValeria Barra // make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 28*cb32e2e7SValeria Barra // 29*cb32e2e7SValeria Barra // Sample runs: 30*cb32e2e7SValeria Barra // 31*cb32e2e7SValeria Barra // bpsraw -problem bp1 32*cb32e2e7SValeria Barra // bpsraw -problem bp2 -ceed /cpu/self 33*cb32e2e7SValeria Barra // bpsraw -problem bp3 -ceed /gpu/occa 34*cb32e2e7SValeria Barra // bpsraw -problem bp4 -ceed /cpu/occa 35*cb32e2e7SValeria Barra // bpsraw -problem bp5 -ceed /omp/occa 36*cb32e2e7SValeria Barra // bpsraw -problem bp6 -ceed /ocl/occa 37*cb32e2e7SValeria Barra // 38*cb32e2e7SValeria Barra //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -qextra 5 39*cb32e2e7SValeria Barra 40*cb32e2e7SValeria Barra /// @file 41*cb32e2e7SValeria Barra /// CEED BPs example using PETSc 42*cb32e2e7SValeria Barra /// See bps.c for an implementation using DMPlex unstructured grids. 43*cb32e2e7SValeria Barra const char help[] = "Solve CEED BPs using PETSc\n"; 44*cb32e2e7SValeria Barra 45*cb32e2e7SValeria Barra #include <stdbool.h> 46*cb32e2e7SValeria Barra #include <string.h> 47*cb32e2e7SValeria Barra #include <petscksp.h> 48*cb32e2e7SValeria Barra #include <ceed.h> 49*cb32e2e7SValeria Barra #include "qfunctions/bps/common.h" 50*cb32e2e7SValeria Barra #include "qfunctions/bps/bp1.h" 51*cb32e2e7SValeria Barra #include "qfunctions/bps/bp2.h" 52*cb32e2e7SValeria Barra #include "qfunctions/bps/bp3.h" 53*cb32e2e7SValeria Barra #include "qfunctions/bps/bp4.h" 54*cb32e2e7SValeria Barra 55*cb32e2e7SValeria Barra static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 56*cb32e2e7SValeria Barra for (PetscInt d=0,sizeleft=size; d<3; d++) { 57*cb32e2e7SValeria Barra PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 58*cb32e2e7SValeria Barra while (try * (sizeleft / try) != sizeleft) try++; 59*cb32e2e7SValeria Barra m[reverse ? 2-d : d] = try; 60*cb32e2e7SValeria Barra sizeleft /= try; 61*cb32e2e7SValeria Barra } 62*cb32e2e7SValeria Barra } 63*cb32e2e7SValeria Barra 64*cb32e2e7SValeria Barra static PetscInt Max3(const PetscInt a[3]) { 65*cb32e2e7SValeria Barra return PetscMax(a[0], PetscMax(a[1], a[2])); 66*cb32e2e7SValeria Barra } 67*cb32e2e7SValeria Barra static PetscInt Min3(const PetscInt a[3]) { 68*cb32e2e7SValeria Barra return PetscMin(a[0], PetscMin(a[1], a[2])); 69*cb32e2e7SValeria Barra } 70*cb32e2e7SValeria Barra static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3], 71*cb32e2e7SValeria Barra PetscInt degree, const PetscInt melem[3], 72*cb32e2e7SValeria Barra PetscInt mnodes[3]) { 73*cb32e2e7SValeria Barra for (int d=0; d<3; d++) 74*cb32e2e7SValeria Barra mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1); 75*cb32e2e7SValeria Barra } 76*cb32e2e7SValeria Barra static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 77*cb32e2e7SValeria Barra PetscInt degree, const PetscInt melem[3]) { 78*cb32e2e7SValeria Barra PetscInt start = 0; 79*cb32e2e7SValeria Barra // Dumb brute-force is easier to read 80*cb32e2e7SValeria Barra for (PetscInt i=0; i<p[0]; i++) { 81*cb32e2e7SValeria Barra for (PetscInt j=0; j<p[1]; j++) { 82*cb32e2e7SValeria Barra for (PetscInt k=0; k<p[2]; k++) { 83*cb32e2e7SValeria Barra PetscInt mnodes[3], ijkrank[] = {i,j,k}; 84*cb32e2e7SValeria Barra if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 85*cb32e2e7SValeria Barra GlobalNodes(p, ijkrank, degree, melem, mnodes); 86*cb32e2e7SValeria Barra start += mnodes[0] * mnodes[1] * mnodes[2]; 87*cb32e2e7SValeria Barra } 88*cb32e2e7SValeria Barra } 89*cb32e2e7SValeria Barra } 90*cb32e2e7SValeria Barra return -1; 91*cb32e2e7SValeria Barra } 92*cb32e2e7SValeria Barra static int CreateRestriction(Ceed ceed, const CeedInt melem[3], 93*cb32e2e7SValeria Barra CeedInt P, CeedInt ncomp, 94*cb32e2e7SValeria Barra CeedElemRestriction *Erestrict) { 95*cb32e2e7SValeria Barra const PetscInt nelem = melem[0]*melem[1]*melem[2]; 96*cb32e2e7SValeria Barra PetscInt mnodes[3], *idx, *idxp; 97*cb32e2e7SValeria Barra 98*cb32e2e7SValeria Barra // Get indicies 99*cb32e2e7SValeria Barra for (int d=0; d<3; d++) mnodes[d] = melem[d]*(P-1) + 1; 100*cb32e2e7SValeria Barra idxp = idx = malloc(nelem*P*P*P*sizeof idx[0]); 101*cb32e2e7SValeria Barra for (CeedInt i=0; i<melem[0]; i++) 102*cb32e2e7SValeria Barra for (CeedInt j=0; j<melem[1]; j++) 103*cb32e2e7SValeria Barra for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) 104*cb32e2e7SValeria Barra for (CeedInt ii=0; ii<P; ii++) 105*cb32e2e7SValeria Barra for (CeedInt jj=0; jj<P; jj++) 106*cb32e2e7SValeria Barra for (CeedInt kk=0; kk<P; kk++) { 107*cb32e2e7SValeria Barra if (0) { // This is the C-style (i,j,k) ordering that I prefer 108*cb32e2e7SValeria Barra idxp[(ii*P+jj)*P+kk] = (((i*(P-1)+ii)*mnodes[1] 109*cb32e2e7SValeria Barra + (j*(P-1)+jj))*mnodes[2] 110*cb32e2e7SValeria Barra + (k*(P-1)+kk)); 111*cb32e2e7SValeria Barra } else { // (k,j,i) ordering for consistency with MFEM example 112*cb32e2e7SValeria Barra idxp[ii+P*(jj+P*kk)] = (((i*(P-1)+ii)*mnodes[1] 113*cb32e2e7SValeria Barra + (j*(P-1)+jj))*mnodes[2] 114*cb32e2e7SValeria Barra + (k*(P-1)+kk)); 115*cb32e2e7SValeria Barra } 116*cb32e2e7SValeria Barra } 117*cb32e2e7SValeria Barra 118*cb32e2e7SValeria Barra // Setup CEED restriction 119*cb32e2e7SValeria Barra CeedElemRestrictionCreate(ceed, nelem, P*P*P, mnodes[0]*mnodes[1]*mnodes[2], 120*cb32e2e7SValeria Barra ncomp, CEED_MEM_HOST, CEED_OWN_POINTER, idx, 121*cb32e2e7SValeria Barra Erestrict); 122*cb32e2e7SValeria Barra 123*cb32e2e7SValeria Barra PetscFunctionReturn(0); 124*cb32e2e7SValeria Barra } 125*cb32e2e7SValeria Barra 126*cb32e2e7SValeria Barra // Data for PETSc 127*cb32e2e7SValeria Barra typedef struct User_ *User; 128*cb32e2e7SValeria Barra struct User_ { 129*cb32e2e7SValeria Barra MPI_Comm comm; 130*cb32e2e7SValeria Barra VecScatter ltog; // Scatter for all entries 131*cb32e2e7SValeria Barra VecScatter ltog0; // Skip Dirichlet values 132*cb32e2e7SValeria Barra VecScatter gtogD; // global-to-global; only Dirichlet values 133*cb32e2e7SValeria Barra Vec Xloc, Yloc; 134*cb32e2e7SValeria Barra CeedVector xceed, yceed; 135*cb32e2e7SValeria Barra CeedOperator op; 136*cb32e2e7SValeria Barra CeedVector qdata; 137*cb32e2e7SValeria Barra Ceed ceed; 138*cb32e2e7SValeria Barra }; 139*cb32e2e7SValeria Barra 140*cb32e2e7SValeria Barra // BP Options 141*cb32e2e7SValeria Barra typedef enum { 142*cb32e2e7SValeria Barra CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 143*cb32e2e7SValeria Barra CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 144*cb32e2e7SValeria Barra } bpType; 145*cb32e2e7SValeria Barra static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 146*cb32e2e7SValeria Barra "bpType","CEED_BP",0 147*cb32e2e7SValeria Barra }; 148*cb32e2e7SValeria Barra 149*cb32e2e7SValeria Barra // BP specific data 150*cb32e2e7SValeria Barra typedef struct { 151*cb32e2e7SValeria Barra CeedInt ncompu, qdatasize, qextra; 152*cb32e2e7SValeria Barra CeedQFunctionUser setupgeo, setuprhs, apply, error; 153*cb32e2e7SValeria Barra const char *setupgeofname, *setuprhsfname, *applyfname, *errorfname; 154*cb32e2e7SValeria Barra CeedEvalMode inmode, outmode; 155*cb32e2e7SValeria Barra CeedQuadMode qmode; 156*cb32e2e7SValeria Barra } bpData; 157*cb32e2e7SValeria Barra 158*cb32e2e7SValeria Barra bpData bpOptions[6] = { 159*cb32e2e7SValeria Barra [CEED_BP1] = { 160*cb32e2e7SValeria Barra .ncompu = 1, 161*cb32e2e7SValeria Barra .qdatasize = 1, 162*cb32e2e7SValeria Barra .qextra = 1, 163*cb32e2e7SValeria Barra .setupgeo = SetupMassGeo, 164*cb32e2e7SValeria Barra .setuprhs = SetupMassRhs, 165*cb32e2e7SValeria Barra .apply = Mass, 166*cb32e2e7SValeria Barra .error = Error, 167*cb32e2e7SValeria Barra .setupgeofname = SetupMassGeo_loc, 168*cb32e2e7SValeria Barra .setuprhsfname = SetupMassRhs_loc, 169*cb32e2e7SValeria Barra .applyfname = Mass_loc, 170*cb32e2e7SValeria Barra .errorfname = Error_loc, 171*cb32e2e7SValeria Barra .inmode = CEED_EVAL_INTERP, 172*cb32e2e7SValeria Barra .outmode = CEED_EVAL_INTERP, 173*cb32e2e7SValeria Barra .qmode = CEED_GAUSS 174*cb32e2e7SValeria Barra }, 175*cb32e2e7SValeria Barra [CEED_BP2] = { 176*cb32e2e7SValeria Barra .ncompu = 3, 177*cb32e2e7SValeria Barra .qdatasize = 1, 178*cb32e2e7SValeria Barra .qextra = 1, 179*cb32e2e7SValeria Barra .setupgeo = SetupMassGeo, 180*cb32e2e7SValeria Barra .setuprhs = SetupMassRhs3, 181*cb32e2e7SValeria Barra .apply = Mass3, 182*cb32e2e7SValeria Barra .error = Error3, 183*cb32e2e7SValeria Barra .setupgeofname = SetupMassGeo_loc, 184*cb32e2e7SValeria Barra .setuprhsfname = SetupMassRhs3_loc, 185*cb32e2e7SValeria Barra .applyfname = Mass3_loc, 186*cb32e2e7SValeria Barra .errorfname = Error3_loc, 187*cb32e2e7SValeria Barra .inmode = CEED_EVAL_INTERP, 188*cb32e2e7SValeria Barra .outmode = CEED_EVAL_INTERP, 189*cb32e2e7SValeria Barra .qmode = CEED_GAUSS 190*cb32e2e7SValeria Barra }, 191*cb32e2e7SValeria Barra [CEED_BP3] = { 192*cb32e2e7SValeria Barra .ncompu = 1, 193*cb32e2e7SValeria Barra .qdatasize = 6, 194*cb32e2e7SValeria Barra .qextra = 1, 195*cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 196*cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs, 197*cb32e2e7SValeria Barra .apply = Diff, 198*cb32e2e7SValeria Barra .error = Error, 199*cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 200*cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs_loc, 201*cb32e2e7SValeria Barra .applyfname = Diff_loc, 202*cb32e2e7SValeria Barra .errorfname = Error_loc, 203*cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 204*cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 205*cb32e2e7SValeria Barra .qmode = CEED_GAUSS 206*cb32e2e7SValeria Barra }, 207*cb32e2e7SValeria Barra [CEED_BP4] = { 208*cb32e2e7SValeria Barra .ncompu = 3, 209*cb32e2e7SValeria Barra .qdatasize = 6, 210*cb32e2e7SValeria Barra .qextra = 1, 211*cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 212*cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs3, 213*cb32e2e7SValeria Barra .apply = Diff3, 214*cb32e2e7SValeria Barra .error = Error3, 215*cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 216*cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs3_loc, 217*cb32e2e7SValeria Barra .applyfname = Diff_loc, 218*cb32e2e7SValeria Barra .errorfname = Error3_loc, 219*cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 220*cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 221*cb32e2e7SValeria Barra .qmode = CEED_GAUSS 222*cb32e2e7SValeria Barra }, 223*cb32e2e7SValeria Barra [CEED_BP5] = { 224*cb32e2e7SValeria Barra .ncompu = 1, 225*cb32e2e7SValeria Barra .qdatasize = 6, 226*cb32e2e7SValeria Barra .qextra = 0, 227*cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 228*cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs, 229*cb32e2e7SValeria Barra .apply = Diff, 230*cb32e2e7SValeria Barra .error = Error, 231*cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 232*cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs_loc, 233*cb32e2e7SValeria Barra .applyfname = Diff_loc, 234*cb32e2e7SValeria Barra .errorfname = Error_loc, 235*cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 236*cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 237*cb32e2e7SValeria Barra .qmode = CEED_GAUSS_LOBATTO 238*cb32e2e7SValeria Barra }, 239*cb32e2e7SValeria Barra [CEED_BP6] = { 240*cb32e2e7SValeria Barra .ncompu = 3, 241*cb32e2e7SValeria Barra .qdatasize = 6, 242*cb32e2e7SValeria Barra .qextra = 0, 243*cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 244*cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs3, 245*cb32e2e7SValeria Barra .apply = Diff3, 246*cb32e2e7SValeria Barra .error = Error3, 247*cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 248*cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs3_loc, 249*cb32e2e7SValeria Barra .applyfname = Diff_loc, 250*cb32e2e7SValeria Barra .errorfname = Error3_loc, 251*cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 252*cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 253*cb32e2e7SValeria Barra .qmode = CEED_GAUSS_LOBATTO 254*cb32e2e7SValeria Barra } 255*cb32e2e7SValeria Barra }; 256*cb32e2e7SValeria Barra 257*cb32e2e7SValeria Barra // This function uses libCEED to compute the action of the mass matrix 258*cb32e2e7SValeria Barra static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 259*cb32e2e7SValeria Barra PetscErrorCode ierr; 260*cb32e2e7SValeria Barra User user; 261*cb32e2e7SValeria Barra PetscScalar *x, *y; 262*cb32e2e7SValeria Barra 263*cb32e2e7SValeria Barra PetscFunctionBeginUser; 264*cb32e2e7SValeria Barra ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 265*cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 266*cb32e2e7SValeria Barra SCATTER_REVERSE); CHKERRQ(ierr); 267*cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 268*cb32e2e7SValeria Barra CHKERRQ(ierr); 269*cb32e2e7SValeria Barra ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 270*cb32e2e7SValeria Barra 271*cb32e2e7SValeria Barra ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 272*cb32e2e7SValeria Barra ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 273*cb32e2e7SValeria Barra CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 274*cb32e2e7SValeria Barra CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 275*cb32e2e7SValeria Barra 276*cb32e2e7SValeria Barra CeedOperatorApply(user->op, user->xceed, user->yceed, 277*cb32e2e7SValeria Barra CEED_REQUEST_IMMEDIATE); 278*cb32e2e7SValeria Barra ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 279*cb32e2e7SValeria Barra 280*cb32e2e7SValeria Barra ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 281*cb32e2e7SValeria Barra ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 282*cb32e2e7SValeria Barra 283*cb32e2e7SValeria Barra if (Y) { 284*cb32e2e7SValeria Barra ierr = VecZeroEntries(Y); CHKERRQ(ierr); 285*cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 286*cb32e2e7SValeria Barra CHKERRQ(ierr); 287*cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 288*cb32e2e7SValeria Barra CHKERRQ(ierr); 289*cb32e2e7SValeria Barra } 290*cb32e2e7SValeria Barra PetscFunctionReturn(0); 291*cb32e2e7SValeria Barra } 292*cb32e2e7SValeria Barra 293*cb32e2e7SValeria Barra // This function uses libCEED to compute the action of the Laplacian with 294*cb32e2e7SValeria Barra // Dirichlet boundary conditions 295*cb32e2e7SValeria Barra static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 296*cb32e2e7SValeria Barra PetscErrorCode ierr; 297*cb32e2e7SValeria Barra User user; 298*cb32e2e7SValeria Barra PetscScalar *x, *y; 299*cb32e2e7SValeria Barra 300*cb32e2e7SValeria Barra PetscFunctionBeginUser; 301*cb32e2e7SValeria Barra ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 302*cb32e2e7SValeria Barra 303*cb32e2e7SValeria Barra // Global-to-local 304*cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 305*cb32e2e7SValeria Barra SCATTER_REVERSE); CHKERRQ(ierr); 306*cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 307*cb32e2e7SValeria Barra SCATTER_REVERSE); 308*cb32e2e7SValeria Barra CHKERRQ(ierr); 309*cb32e2e7SValeria Barra ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 310*cb32e2e7SValeria Barra 311*cb32e2e7SValeria Barra // Setup CEED vectors 312*cb32e2e7SValeria Barra ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 313*cb32e2e7SValeria Barra ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 314*cb32e2e7SValeria Barra CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 315*cb32e2e7SValeria Barra CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 316*cb32e2e7SValeria Barra 317*cb32e2e7SValeria Barra // Apply CEED operator 318*cb32e2e7SValeria Barra CeedOperatorApply(user->op, user->xceed, user->yceed, 319*cb32e2e7SValeria Barra CEED_REQUEST_IMMEDIATE); 320*cb32e2e7SValeria Barra ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 321*cb32e2e7SValeria Barra 322*cb32e2e7SValeria Barra // Restore PETSc vectors 323*cb32e2e7SValeria Barra ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 324*cb32e2e7SValeria Barra ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 325*cb32e2e7SValeria Barra 326*cb32e2e7SValeria Barra // Local-to-global 327*cb32e2e7SValeria Barra ierr = VecZeroEntries(Y); CHKERRQ(ierr); 328*cb32e2e7SValeria Barra ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 329*cb32e2e7SValeria Barra CHKERRQ(ierr); 330*cb32e2e7SValeria Barra ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 331*cb32e2e7SValeria Barra CHKERRQ(ierr); 332*cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 333*cb32e2e7SValeria Barra CHKERRQ(ierr); 334*cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 335*cb32e2e7SValeria Barra CHKERRQ(ierr); 336*cb32e2e7SValeria Barra 337*cb32e2e7SValeria Barra PetscFunctionReturn(0); 338*cb32e2e7SValeria Barra } 339*cb32e2e7SValeria Barra 340*cb32e2e7SValeria Barra // This function calculates the error in the final solution 341*cb32e2e7SValeria Barra static PetscErrorCode ComputeErrorMax(User user, CeedOperator op_error, Vec X, 342*cb32e2e7SValeria Barra CeedVector target, PetscReal *maxerror) { 343*cb32e2e7SValeria Barra PetscErrorCode ierr; 344*cb32e2e7SValeria Barra PetscScalar *x; 345*cb32e2e7SValeria Barra CeedVector collocated_error; 346*cb32e2e7SValeria Barra CeedInt length; 347*cb32e2e7SValeria Barra 348*cb32e2e7SValeria Barra PetscFunctionBeginUser; 349*cb32e2e7SValeria Barra CeedVectorGetLength(target, &length); 350*cb32e2e7SValeria Barra CeedVectorCreate(user->ceed, length, &collocated_error); 351*cb32e2e7SValeria Barra 352*cb32e2e7SValeria Barra // Global-to-local 353*cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 354*cb32e2e7SValeria Barra SCATTER_REVERSE); CHKERRQ(ierr); 355*cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 356*cb32e2e7SValeria Barra CHKERRQ(ierr); 357*cb32e2e7SValeria Barra 358*cb32e2e7SValeria Barra // Setup CEED vector 359*cb32e2e7SValeria Barra ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 360*cb32e2e7SValeria Barra CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 361*cb32e2e7SValeria Barra 362*cb32e2e7SValeria Barra // Apply CEED operator 363*cb32e2e7SValeria Barra CeedOperatorApply(op_error, user->xceed, collocated_error, 364*cb32e2e7SValeria Barra CEED_REQUEST_IMMEDIATE); 365*cb32e2e7SValeria Barra 366*cb32e2e7SValeria Barra // Restore PETSc vector 367*cb32e2e7SValeria Barra VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 368*cb32e2e7SValeria Barra 369*cb32e2e7SValeria Barra // Reduce max error 370*cb32e2e7SValeria Barra *maxerror = 0; 371*cb32e2e7SValeria Barra const CeedScalar *e; 372*cb32e2e7SValeria Barra CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 373*cb32e2e7SValeria Barra for (CeedInt i=0; i<length; i++) { 374*cb32e2e7SValeria Barra *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 375*cb32e2e7SValeria Barra } 376*cb32e2e7SValeria Barra CeedVectorRestoreArrayRead(collocated_error, &e); 377*cb32e2e7SValeria Barra ierr = MPI_Allreduce(MPI_IN_PLACE, maxerror, 378*cb32e2e7SValeria Barra 1, MPIU_REAL, MPIU_MAX, user->comm); CHKERRQ(ierr); 379*cb32e2e7SValeria Barra 380*cb32e2e7SValeria Barra // Cleanup 381*cb32e2e7SValeria Barra CeedVectorDestroy(&collocated_error); 382*cb32e2e7SValeria Barra 383*cb32e2e7SValeria Barra PetscFunctionReturn(0); 384*cb32e2e7SValeria Barra } 385*cb32e2e7SValeria Barra 386*cb32e2e7SValeria Barra int main(int argc, char **argv) { 387*cb32e2e7SValeria Barra PetscInt ierr; 388*cb32e2e7SValeria Barra MPI_Comm comm; 389*cb32e2e7SValeria Barra char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 390*cb32e2e7SValeria Barra double my_rt_start, my_rt, rt_min, rt_max; 391*cb32e2e7SValeria Barra PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3], 392*cb32e2e7SValeria Barra irank[3], lnodes[3], lsize, ncompu = 1; 393*cb32e2e7SValeria Barra PetscScalar *r; 394*cb32e2e7SValeria Barra PetscBool test_mode, benchmark_mode, write_solution; 395*cb32e2e7SValeria Barra PetscMPIInt size, rank; 396*cb32e2e7SValeria Barra Vec X, Xloc, rhs, rhsloc; 397*cb32e2e7SValeria Barra Mat mat; 398*cb32e2e7SValeria Barra KSP ksp; 399*cb32e2e7SValeria Barra VecScatter ltog, ltog0, gtogD; 400*cb32e2e7SValeria Barra User user; 401*cb32e2e7SValeria Barra Ceed ceed; 402*cb32e2e7SValeria Barra CeedBasis basisx, basisu; 403*cb32e2e7SValeria Barra CeedElemRestriction Erestrictx, Erestrictu, Erestrictxi, Erestrictui, 404*cb32e2e7SValeria Barra Erestrictqdi; 405*cb32e2e7SValeria Barra CeedQFunction qf_setupgeo, qf_setuprhs, qf_apply, qf_error; 406*cb32e2e7SValeria Barra CeedOperator op_setupgeo, op_setuprhs, op_apply, op_error; 407*cb32e2e7SValeria Barra CeedVector xcoord, qdata, rhsceed, target; 408*cb32e2e7SValeria Barra CeedInt P, Q; 409*cb32e2e7SValeria Barra const CeedInt dim = 3, ncompx = 3; 410*cb32e2e7SValeria Barra bpType bpChoice; 411*cb32e2e7SValeria Barra 412*cb32e2e7SValeria Barra ierr = PetscInitialize(&argc, &argv, NULL, help); 413*cb32e2e7SValeria Barra if (ierr) return ierr; 414*cb32e2e7SValeria Barra comm = PETSC_COMM_WORLD; 415*cb32e2e7SValeria Barra ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 416*cb32e2e7SValeria Barra bpChoice = CEED_BP1; 417*cb32e2e7SValeria Barra ierr = PetscOptionsEnum("-problem", 418*cb32e2e7SValeria Barra "CEED benchmark problem to solve", NULL, 419*cb32e2e7SValeria Barra bpTypes, (PetscEnum)bpChoice, (PetscEnum *)&bpChoice, 420*cb32e2e7SValeria Barra NULL); CHKERRQ(ierr); 421*cb32e2e7SValeria Barra ncompu = bpOptions[bpChoice].ncompu; 422*cb32e2e7SValeria Barra test_mode = PETSC_FALSE; 423*cb32e2e7SValeria Barra ierr = PetscOptionsBool("-test", 424*cb32e2e7SValeria Barra "Testing mode (do not print unless error is large)", 425*cb32e2e7SValeria Barra NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 426*cb32e2e7SValeria Barra benchmark_mode = PETSC_FALSE; 427*cb32e2e7SValeria Barra ierr = PetscOptionsBool("-benchmark", 428*cb32e2e7SValeria Barra "Benchmarking mode (prints benchmark statistics)", 429*cb32e2e7SValeria Barra NULL, benchmark_mode, &benchmark_mode, NULL); 430*cb32e2e7SValeria Barra CHKERRQ(ierr); 431*cb32e2e7SValeria Barra write_solution = PETSC_FALSE; 432*cb32e2e7SValeria Barra ierr = PetscOptionsBool("-write_solution", 433*cb32e2e7SValeria Barra "Write solution for visualization", 434*cb32e2e7SValeria Barra NULL, write_solution, &write_solution, NULL); 435*cb32e2e7SValeria Barra CHKERRQ(ierr); 436*cb32e2e7SValeria Barra degree = test_mode ? 3 : 1; 437*cb32e2e7SValeria Barra ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 438*cb32e2e7SValeria Barra NULL, degree, °ree, NULL); CHKERRQ(ierr); 439*cb32e2e7SValeria Barra qextra = bpOptions[bpChoice].qextra; 440*cb32e2e7SValeria Barra ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 441*cb32e2e7SValeria Barra NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 442*cb32e2e7SValeria Barra ierr = PetscOptionsString("-ceed", "CEED resource specifier", 443*cb32e2e7SValeria Barra NULL, ceedresource, ceedresource, 444*cb32e2e7SValeria Barra sizeof(ceedresource), NULL); CHKERRQ(ierr); 445*cb32e2e7SValeria Barra localnodes = 1000; 446*cb32e2e7SValeria Barra ierr = PetscOptionsInt("-local", 447*cb32e2e7SValeria Barra "Target number of locally owned nodes per process", 448*cb32e2e7SValeria Barra NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr); 449*cb32e2e7SValeria Barra ierr = PetscOptionsEnd(); CHKERRQ(ierr); 450*cb32e2e7SValeria Barra P = degree + 1; 451*cb32e2e7SValeria Barra Q = P + qextra; 452*cb32e2e7SValeria Barra 453*cb32e2e7SValeria Barra // Determine size of process grid 454*cb32e2e7SValeria Barra ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 455*cb32e2e7SValeria Barra Split3(size, p, false); 456*cb32e2e7SValeria Barra 457*cb32e2e7SValeria Barra // Find a nicely composite number of elements no less than localnodes 458*cb32e2e7SValeria Barra for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ; 459*cb32e2e7SValeria Barra localelem++) { 460*cb32e2e7SValeria Barra Split3(localelem, melem, true); 461*cb32e2e7SValeria Barra if (Max3(melem) / Min3(melem) <= 2) break; 462*cb32e2e7SValeria Barra } 463*cb32e2e7SValeria Barra 464*cb32e2e7SValeria Barra // Find my location in the process grid 465*cb32e2e7SValeria Barra ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 466*cb32e2e7SValeria Barra for (int d=0,rankleft=rank; d<dim; d++) { 467*cb32e2e7SValeria Barra const int pstride[3] = {p[1] *p[2], p[2], 1}; 468*cb32e2e7SValeria Barra irank[d] = rankleft / pstride[d]; 469*cb32e2e7SValeria Barra rankleft -= irank[d] * pstride[d]; 470*cb32e2e7SValeria Barra } 471*cb32e2e7SValeria Barra 472*cb32e2e7SValeria Barra GlobalNodes(p, irank, degree, melem, mnodes); 473*cb32e2e7SValeria Barra 474*cb32e2e7SValeria Barra // Setup global vector 475*cb32e2e7SValeria Barra ierr = VecCreate(comm, &X); CHKERRQ(ierr); 476*cb32e2e7SValeria Barra ierr = VecSetSizes(X, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE); 477*cb32e2e7SValeria Barra CHKERRQ(ierr); 478*cb32e2e7SValeria Barra ierr = VecSetUp(X); CHKERRQ(ierr); 479*cb32e2e7SValeria Barra 480*cb32e2e7SValeria Barra // Set up libCEED 481*cb32e2e7SValeria Barra CeedInit(ceedresource, &ceed); 482*cb32e2e7SValeria Barra 483*cb32e2e7SValeria Barra // Print summary 484*cb32e2e7SValeria Barra if (!test_mode) { 485*cb32e2e7SValeria Barra CeedInt gsize; 486*cb32e2e7SValeria Barra ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 487*cb32e2e7SValeria Barra const char *usedresource; 488*cb32e2e7SValeria Barra CeedGetResource(ceed, &usedresource); 489*cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 490*cb32e2e7SValeria Barra "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 491*cb32e2e7SValeria Barra " libCEED:\n" 492*cb32e2e7SValeria Barra " libCEED Backend : %s\n" 493*cb32e2e7SValeria Barra " Mesh:\n" 494*cb32e2e7SValeria Barra " Number of 1D Basis Nodes (p) : %d\n" 495*cb32e2e7SValeria Barra " Number of 1D Quadrature Points (q) : %d\n" 496*cb32e2e7SValeria Barra " Global nodes : %D\n" 497*cb32e2e7SValeria Barra " Process Decomposition : %D %D %D\n" 498*cb32e2e7SValeria Barra " Local Elements : %D = %D %D %D\n" 499*cb32e2e7SValeria Barra " Owned nodes : %D = %D %D %D\n", 500*cb32e2e7SValeria Barra bpChoice+1, usedresource, P, Q, gsize/ncompu, p[0], 501*cb32e2e7SValeria Barra p[1], p[2], localelem, melem[0], melem[1], melem[2], 502*cb32e2e7SValeria Barra mnodes[0]*mnodes[1]*mnodes[2], mnodes[0], mnodes[1], 503*cb32e2e7SValeria Barra mnodes[2]); CHKERRQ(ierr); 504*cb32e2e7SValeria Barra } 505*cb32e2e7SValeria Barra 506*cb32e2e7SValeria Barra { 507*cb32e2e7SValeria Barra lsize = 1; 508*cb32e2e7SValeria Barra for (int d=0; d<dim; d++) { 509*cb32e2e7SValeria Barra lnodes[d] = melem[d]*degree + 1; 510*cb32e2e7SValeria Barra lsize *= lnodes[d]; 511*cb32e2e7SValeria Barra } 512*cb32e2e7SValeria Barra ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 513*cb32e2e7SValeria Barra ierr = VecSetSizes(Xloc, lsize*ncompu, PETSC_DECIDE); CHKERRQ(ierr); 514*cb32e2e7SValeria Barra ierr = VecSetUp(Xloc); CHKERRQ(ierr); 515*cb32e2e7SValeria Barra 516*cb32e2e7SValeria Barra // Create local-to-global scatter 517*cb32e2e7SValeria Barra PetscInt *ltogind, *ltogind0, *locind, l0count; 518*cb32e2e7SValeria Barra IS ltogis, ltogis0, locis; 519*cb32e2e7SValeria Barra PetscInt gstart[2][2][2], gmnodes[2][2][2][dim]; 520*cb32e2e7SValeria Barra 521*cb32e2e7SValeria Barra for (int i=0; i<2; i++) { 522*cb32e2e7SValeria Barra for (int j=0; j<2; j++) { 523*cb32e2e7SValeria Barra for (int k=0; k<2; k++) { 524*cb32e2e7SValeria Barra PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 525*cb32e2e7SValeria Barra gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 526*cb32e2e7SValeria Barra GlobalNodes(p, ijkrank, degree, melem, gmnodes[i][j][k]); 527*cb32e2e7SValeria Barra } 528*cb32e2e7SValeria Barra } 529*cb32e2e7SValeria Barra } 530*cb32e2e7SValeria Barra 531*cb32e2e7SValeria Barra ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 532*cb32e2e7SValeria Barra ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 533*cb32e2e7SValeria Barra ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 534*cb32e2e7SValeria Barra l0count = 0; 535*cb32e2e7SValeria Barra for (PetscInt i=0,ir,ii; ir=i>=mnodes[0], ii=i-ir*mnodes[0], i<lnodes[0]; i++) 536*cb32e2e7SValeria Barra for (PetscInt j=0,jr,jj; jr=j>=mnodes[1], jj=j-jr*mnodes[1], j<lnodes[1]; j++) 537*cb32e2e7SValeria Barra for (PetscInt k=0,kr,kk; kr=k>=mnodes[2], kk=k-kr*mnodes[2], k<lnodes[2]; k++) { 538*cb32e2e7SValeria Barra PetscInt here = (i*lnodes[1]+j)*lnodes[2]+k; 539*cb32e2e7SValeria Barra ltogind[here] = 540*cb32e2e7SValeria Barra gstart[ir][jr][kr] + (ii*gmnodes[ir][jr][kr][1]+jj)*gmnodes[ir][jr][kr][2]+kk; 541*cb32e2e7SValeria Barra if ((irank[0] == 0 && i == 0) 542*cb32e2e7SValeria Barra || (irank[1] == 0 && j == 0) 543*cb32e2e7SValeria Barra || (irank[2] == 0 && k == 0) 544*cb32e2e7SValeria Barra || (irank[0]+1 == p[0] && i+1 == lnodes[0]) 545*cb32e2e7SValeria Barra || (irank[1]+1 == p[1] && j+1 == lnodes[1]) 546*cb32e2e7SValeria Barra || (irank[2]+1 == p[2] && k+1 == lnodes[2])) 547*cb32e2e7SValeria Barra continue; 548*cb32e2e7SValeria Barra ltogind0[l0count] = ltogind[here]; 549*cb32e2e7SValeria Barra locind[l0count++] = here; 550*cb32e2e7SValeria Barra } 551*cb32e2e7SValeria Barra ierr = ISCreateBlock(comm, ncompu, lsize, ltogind, PETSC_OWN_POINTER, 552*cb32e2e7SValeria Barra <ogis); CHKERRQ(ierr); 553*cb32e2e7SValeria Barra ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 554*cb32e2e7SValeria Barra CHKERRQ(ierr); 555*cb32e2e7SValeria Barra ierr = ISCreateBlock(comm, ncompu, l0count, ltogind0, PETSC_OWN_POINTER, 556*cb32e2e7SValeria Barra <ogis0); CHKERRQ(ierr); 557*cb32e2e7SValeria Barra ierr = ISCreateBlock(comm, ncompu, l0count, locind, PETSC_OWN_POINTER, 558*cb32e2e7SValeria Barra &locis); CHKERRQ(ierr); 559*cb32e2e7SValeria Barra ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 560*cb32e2e7SValeria Barra { 561*cb32e2e7SValeria Barra // Create global-to-global scatter for Dirichlet values (everything not in 562*cb32e2e7SValeria Barra // ltogis0, which is the range of ltog0) 563*cb32e2e7SValeria Barra PetscInt xstart, xend, *indD, countD = 0; 564*cb32e2e7SValeria Barra IS isD; 565*cb32e2e7SValeria Barra const PetscScalar *x; 566*cb32e2e7SValeria Barra ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 567*cb32e2e7SValeria Barra ierr = VecSet(X, 1.0); CHKERRQ(ierr); 568*cb32e2e7SValeria Barra ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 569*cb32e2e7SValeria Barra CHKERRQ(ierr); 570*cb32e2e7SValeria Barra ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 571*cb32e2e7SValeria Barra CHKERRQ(ierr); 572*cb32e2e7SValeria Barra ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 573*cb32e2e7SValeria Barra ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 574*cb32e2e7SValeria Barra ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 575*cb32e2e7SValeria Barra for (PetscInt i=0; i<xend-xstart; i++) { 576*cb32e2e7SValeria Barra if (x[i] == 1.) indD[countD++] = xstart + i; 577*cb32e2e7SValeria Barra } 578*cb32e2e7SValeria Barra ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 579*cb32e2e7SValeria Barra ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 580*cb32e2e7SValeria Barra CHKERRQ(ierr); 581*cb32e2e7SValeria Barra ierr = PetscFree(indD); CHKERRQ(ierr); 582*cb32e2e7SValeria Barra ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 583*cb32e2e7SValeria Barra ierr = ISDestroy(&isD); CHKERRQ(ierr); 584*cb32e2e7SValeria Barra } 585*cb32e2e7SValeria Barra ierr = ISDestroy(<ogis); CHKERRQ(ierr); 586*cb32e2e7SValeria Barra ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 587*cb32e2e7SValeria Barra ierr = ISDestroy(&locis); CHKERRQ(ierr); 588*cb32e2e7SValeria Barra } 589*cb32e2e7SValeria Barra 590*cb32e2e7SValeria Barra // CEED bases 591*cb32e2e7SValeria Barra CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompu, P, Q, 592*cb32e2e7SValeria Barra bpOptions[bpChoice].qmode, &basisu); 593*cb32e2e7SValeria Barra CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, Q, 594*cb32e2e7SValeria Barra bpOptions[bpChoice].qmode, &basisx); 595*cb32e2e7SValeria Barra 596*cb32e2e7SValeria Barra // CEED restrictions 597*cb32e2e7SValeria Barra CreateRestriction(ceed, melem, P, ncompu, &Erestrictu); 598*cb32e2e7SValeria Barra CreateRestriction(ceed, melem, 2, dim, &Erestrictx); 599*cb32e2e7SValeria Barra CeedInt nelem = melem[0]*melem[1]*melem[2]; 600*cb32e2e7SValeria Barra CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, ncompu, 601*cb32e2e7SValeria Barra &Erestrictui); 602*cb32e2e7SValeria Barra CeedElemRestrictionCreateIdentity(ceed, nelem, 603*cb32e2e7SValeria Barra Q*Q*Q, 604*cb32e2e7SValeria Barra nelem*Q*Q*Q, 605*cb32e2e7SValeria Barra bpOptions[bpChoice].qdatasize, &Erestrictqdi); 606*cb32e2e7SValeria Barra CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, 1, 607*cb32e2e7SValeria Barra &Erestrictxi); 608*cb32e2e7SValeria Barra { 609*cb32e2e7SValeria Barra CeedScalar *xloc; 610*cb32e2e7SValeria Barra CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 611*cb32e2e7SValeria Barra shape[0]*shape[1]*shape[2]; 612*cb32e2e7SValeria Barra xloc = malloc(len*ncompx*sizeof xloc[0]); 613*cb32e2e7SValeria Barra for (CeedInt i=0; i<shape[0]; i++) { 614*cb32e2e7SValeria Barra for (CeedInt j=0; j<shape[1]; j++) { 615*cb32e2e7SValeria Barra for (CeedInt k=0; k<shape[2]; k++) { 616*cb32e2e7SValeria Barra xloc[((i*shape[1]+j)*shape[2]+k) + 0*len] = 1.*(irank[0]*melem[0]+i) / 617*cb32e2e7SValeria Barra (p[0]*melem[0]); 618*cb32e2e7SValeria Barra xloc[((i*shape[1]+j)*shape[2]+k) + 1*len] = 1.*(irank[1]*melem[1]+j) / 619*cb32e2e7SValeria Barra (p[1]*melem[1]); 620*cb32e2e7SValeria Barra xloc[((i*shape[1]+j)*shape[2]+k) + 2*len] = 1.*(irank[2]*melem[2]+k) / 621*cb32e2e7SValeria Barra (p[2]*melem[2]); 622*cb32e2e7SValeria Barra } 623*cb32e2e7SValeria Barra } 624*cb32e2e7SValeria Barra } 625*cb32e2e7SValeria Barra CeedVectorCreate(ceed, len*ncompx, &xcoord); 626*cb32e2e7SValeria Barra CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 627*cb32e2e7SValeria Barra } 628*cb32e2e7SValeria Barra 629*cb32e2e7SValeria Barra // Create the Qfunction that builds the operator quadrature data 630*cb32e2e7SValeria Barra CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].setupgeo, 631*cb32e2e7SValeria Barra bpOptions[bpChoice].setupgeofname, &qf_setupgeo); 632*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_setupgeo, "dx", ncompx*dim, CEED_EVAL_GRAD); 633*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_setupgeo, "weight", 1, CEED_EVAL_WEIGHT); 634*cb32e2e7SValeria Barra CeedQFunctionAddOutput(qf_setupgeo, "qdata", bpOptions[bpChoice].qdatasize, 635*cb32e2e7SValeria Barra CEED_EVAL_NONE); 636*cb32e2e7SValeria Barra 637*cb32e2e7SValeria Barra // Create the Qfunction that sets up the RHS and true solution 638*cb32e2e7SValeria Barra CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].setuprhs, 639*cb32e2e7SValeria Barra bpOptions[bpChoice].setuprhsfname, &qf_setuprhs); 640*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_setuprhs, "x", ncompx, CEED_EVAL_INTERP); 641*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_setuprhs, "dx", ncompx*dim, CEED_EVAL_GRAD); 642*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_setuprhs, "weight", 1, CEED_EVAL_WEIGHT); 643*cb32e2e7SValeria Barra CeedQFunctionAddOutput(qf_setuprhs, "true_soln", ncompu, CEED_EVAL_NONE); 644*cb32e2e7SValeria Barra CeedQFunctionAddOutput(qf_setuprhs, "rhs", ncompu, CEED_EVAL_INTERP); 645*cb32e2e7SValeria Barra 646*cb32e2e7SValeria Barra // Set up PDE operator 647*cb32e2e7SValeria Barra CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].apply, 648*cb32e2e7SValeria Barra bpOptions[bpChoice].applyfname, &qf_apply); 649*cb32e2e7SValeria Barra // Add inputs and outputs 650*cb32e2e7SValeria Barra CeedInt inscale = bpOptions[bpChoice].inmode==CEED_EVAL_GRAD ? 3 : 1; 651*cb32e2e7SValeria Barra CeedInt outscale = bpOptions[bpChoice].outmode==CEED_EVAL_GRAD ? 3 : 1; 652*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_apply, "u", ncompu*inscale, 653*cb32e2e7SValeria Barra bpOptions[bpChoice].inmode); 654*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_apply, "qdata", bpOptions[bpChoice].qdatasize, 655*cb32e2e7SValeria Barra CEED_EVAL_NONE); 656*cb32e2e7SValeria Barra CeedQFunctionAddOutput(qf_apply, "v", ncompu*outscale, 657*cb32e2e7SValeria Barra bpOptions[bpChoice].outmode); 658*cb32e2e7SValeria Barra 659*cb32e2e7SValeria Barra // Create the error qfunction 660*cb32e2e7SValeria Barra CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].error, 661*cb32e2e7SValeria Barra bpOptions[bpChoice].errorfname, &qf_error); 662*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_error, "u", ncompu, CEED_EVAL_INTERP); 663*cb32e2e7SValeria Barra CeedQFunctionAddInput(qf_error, "true_soln", ncompu, CEED_EVAL_NONE); 664*cb32e2e7SValeria Barra CeedQFunctionAddOutput(qf_error, "error", ncompu, CEED_EVAL_NONE); 665*cb32e2e7SValeria Barra 666*cb32e2e7SValeria Barra // Create the persistent vectors that will be needed in setup 667*cb32e2e7SValeria Barra CeedInt nqpts; 668*cb32e2e7SValeria Barra CeedBasisGetNumQuadraturePoints(basisu, &nqpts); 669*cb32e2e7SValeria Barra CeedVectorCreate(ceed, bpOptions[bpChoice].qdatasize*nelem*nqpts, &qdata); 670*cb32e2e7SValeria Barra CeedVectorCreate(ceed, nelem*nqpts*ncompu, &target); 671*cb32e2e7SValeria Barra CeedVectorCreate(ceed, lsize*ncompu, &rhsceed); 672*cb32e2e7SValeria Barra 673*cb32e2e7SValeria Barra // Create the operator that builds the quadrature data for the ceed operator 674*cb32e2e7SValeria Barra CeedOperatorCreate(ceed, qf_setupgeo, NULL, NULL, &op_setupgeo); 675*cb32e2e7SValeria Barra CeedOperatorSetField(op_setupgeo, "dx", Erestrictx, CEED_NOTRANSPOSE, 676*cb32e2e7SValeria Barra basisx, CEED_VECTOR_ACTIVE); 677*cb32e2e7SValeria Barra CeedOperatorSetField(op_setupgeo, "weight", Erestrictxi, CEED_NOTRANSPOSE, 678*cb32e2e7SValeria Barra basisx, CEED_VECTOR_NONE); 679*cb32e2e7SValeria Barra CeedOperatorSetField(op_setupgeo, "qdata", Erestrictqdi, CEED_NOTRANSPOSE, 680*cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 681*cb32e2e7SValeria Barra 682*cb32e2e7SValeria Barra // Create the operator that builds the RHS and true solution 683*cb32e2e7SValeria Barra CeedOperatorCreate(ceed, qf_setuprhs, NULL, NULL, &op_setuprhs); 684*cb32e2e7SValeria Barra CeedOperatorSetField(op_setuprhs, "x", Erestrictx, CEED_NOTRANSPOSE, 685*cb32e2e7SValeria Barra basisx, CEED_VECTOR_ACTIVE); 686*cb32e2e7SValeria Barra CeedOperatorSetField(op_setuprhs, "dx", Erestrictx, CEED_NOTRANSPOSE, 687*cb32e2e7SValeria Barra basisx, CEED_VECTOR_ACTIVE); 688*cb32e2e7SValeria Barra CeedOperatorSetField(op_setuprhs, "weight", Erestrictxi, CEED_NOTRANSPOSE, 689*cb32e2e7SValeria Barra basisx, CEED_VECTOR_NONE); 690*cb32e2e7SValeria Barra CeedOperatorSetField(op_setuprhs, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 691*cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, target); 692*cb32e2e7SValeria Barra CeedOperatorSetField(op_setuprhs, "rhs", Erestrictu, CEED_TRANSPOSE, 693*cb32e2e7SValeria Barra basisu, CEED_VECTOR_ACTIVE); 694*cb32e2e7SValeria Barra 695*cb32e2e7SValeria Barra // Create the mass or diff operator 696*cb32e2e7SValeria Barra CeedOperatorCreate(ceed, qf_apply, NULL, NULL, &op_apply); 697*cb32e2e7SValeria Barra CeedOperatorSetField(op_apply, "u", Erestrictu, CEED_TRANSPOSE, 698*cb32e2e7SValeria Barra basisu, CEED_VECTOR_ACTIVE); 699*cb32e2e7SValeria Barra CeedOperatorSetField(op_apply, "qdata", Erestrictqdi, CEED_NOTRANSPOSE, 700*cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, qdata); 701*cb32e2e7SValeria Barra CeedOperatorSetField(op_apply, "v", Erestrictu, CEED_TRANSPOSE, 702*cb32e2e7SValeria Barra basisu, CEED_VECTOR_ACTIVE); 703*cb32e2e7SValeria Barra 704*cb32e2e7SValeria Barra // Create the error operator 705*cb32e2e7SValeria Barra CeedOperatorCreate(ceed, qf_error, NULL, NULL, &op_error); 706*cb32e2e7SValeria Barra CeedOperatorSetField(op_error, "u", Erestrictu, CEED_TRANSPOSE, 707*cb32e2e7SValeria Barra basisu, CEED_VECTOR_ACTIVE); 708*cb32e2e7SValeria Barra CeedOperatorSetField(op_error, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 709*cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, target); 710*cb32e2e7SValeria Barra CeedOperatorSetField(op_error, "error", Erestrictui, CEED_NOTRANSPOSE, 711*cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 712*cb32e2e7SValeria Barra 713*cb32e2e7SValeria Barra // Set up Mat 714*cb32e2e7SValeria Barra ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 715*cb32e2e7SValeria Barra user->comm = comm; 716*cb32e2e7SValeria Barra user->ltog = ltog; 717*cb32e2e7SValeria Barra if (bpChoice != CEED_BP1 && bpChoice != CEED_BP2) { 718*cb32e2e7SValeria Barra user->ltog0 = ltog0; 719*cb32e2e7SValeria Barra user->gtogD = gtogD; 720*cb32e2e7SValeria Barra } 721*cb32e2e7SValeria Barra user->Xloc = Xloc; 722*cb32e2e7SValeria Barra ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 723*cb32e2e7SValeria Barra CeedVectorCreate(ceed, lsize*ncompu, &user->xceed); 724*cb32e2e7SValeria Barra CeedVectorCreate(ceed, lsize*ncompu, &user->yceed); 725*cb32e2e7SValeria Barra user->op = op_apply; 726*cb32e2e7SValeria Barra user->qdata = qdata; 727*cb32e2e7SValeria Barra user->ceed = ceed; 728*cb32e2e7SValeria Barra 729*cb32e2e7SValeria Barra ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 730*cb32e2e7SValeria Barra mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 731*cb32e2e7SValeria Barra PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 732*cb32e2e7SValeria Barra if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 733*cb32e2e7SValeria Barra ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 734*cb32e2e7SValeria Barra CHKERRQ(ierr); 735*cb32e2e7SValeria Barra } else { 736*cb32e2e7SValeria Barra ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 737*cb32e2e7SValeria Barra CHKERRQ(ierr); 738*cb32e2e7SValeria Barra } 739*cb32e2e7SValeria Barra ierr = MatCreateVecs(mat, &rhs, NULL); CHKERRQ(ierr); 740*cb32e2e7SValeria Barra 741*cb32e2e7SValeria Barra // Get RHS vector 742*cb32e2e7SValeria Barra ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 743*cb32e2e7SValeria Barra ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 744*cb32e2e7SValeria Barra ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 745*cb32e2e7SValeria Barra CeedVectorSetArray(rhsceed, CEED_MEM_HOST, CEED_USE_POINTER, r); 746*cb32e2e7SValeria Barra 747*cb32e2e7SValeria Barra // Setup qdata, rhs, and target 748*cb32e2e7SValeria Barra CeedOperatorApply(op_setupgeo, xcoord, qdata, CEED_REQUEST_IMMEDIATE); 749*cb32e2e7SValeria Barra CeedOperatorApply(op_setuprhs, xcoord, rhsceed, CEED_REQUEST_IMMEDIATE); 750*cb32e2e7SValeria Barra ierr = CeedVectorSyncArray(rhsceed, CEED_MEM_HOST); CHKERRQ(ierr); 751*cb32e2e7SValeria Barra CeedVectorDestroy(&xcoord); 752*cb32e2e7SValeria Barra 753*cb32e2e7SValeria Barra // Gather RHS 754*cb32e2e7SValeria Barra ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 755*cb32e2e7SValeria Barra ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 756*cb32e2e7SValeria Barra ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 757*cb32e2e7SValeria Barra CHKERRQ(ierr); 758*cb32e2e7SValeria Barra ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 759*cb32e2e7SValeria Barra CHKERRQ(ierr); 760*cb32e2e7SValeria Barra CeedVectorDestroy(&rhsceed); 761*cb32e2e7SValeria Barra 762*cb32e2e7SValeria Barra ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 763*cb32e2e7SValeria Barra { 764*cb32e2e7SValeria Barra PC pc; 765*cb32e2e7SValeria Barra ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 766*cb32e2e7SValeria Barra if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 767*cb32e2e7SValeria Barra ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 768*cb32e2e7SValeria Barra ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 769*cb32e2e7SValeria Barra } else { 770*cb32e2e7SValeria Barra ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 771*cb32e2e7SValeria Barra } 772*cb32e2e7SValeria Barra ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 773*cb32e2e7SValeria Barra ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 774*cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 775*cb32e2e7SValeria Barra PETSC_DEFAULT); CHKERRQ(ierr); 776*cb32e2e7SValeria Barra } 777*cb32e2e7SValeria Barra ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 778*cb32e2e7SValeria Barra ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 779*cb32e2e7SValeria Barra // First run, if benchmarking 780*cb32e2e7SValeria Barra if (benchmark_mode) { 781*cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 782*cb32e2e7SValeria Barra CHKERRQ(ierr); 783*cb32e2e7SValeria Barra my_rt_start = MPI_Wtime(); 784*cb32e2e7SValeria Barra ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 785*cb32e2e7SValeria Barra my_rt = MPI_Wtime() - my_rt_start; 786*cb32e2e7SValeria Barra ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); 787*cb32e2e7SValeria Barra CHKERRQ(ierr); 788*cb32e2e7SValeria Barra // Set maxits based on first iteration timing 789*cb32e2e7SValeria Barra if (my_rt > 0.02) { 790*cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 791*cb32e2e7SValeria Barra CHKERRQ(ierr); 792*cb32e2e7SValeria Barra } else { 793*cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 794*cb32e2e7SValeria Barra CHKERRQ(ierr); 795*cb32e2e7SValeria Barra } 796*cb32e2e7SValeria Barra } 797*cb32e2e7SValeria Barra // Timed solve 798*cb32e2e7SValeria Barra ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 799*cb32e2e7SValeria Barra my_rt_start = MPI_Wtime(); 800*cb32e2e7SValeria Barra ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 801*cb32e2e7SValeria Barra my_rt = MPI_Wtime() - my_rt_start; 802*cb32e2e7SValeria Barra { 803*cb32e2e7SValeria Barra KSPType ksptype; 804*cb32e2e7SValeria Barra KSPConvergedReason reason; 805*cb32e2e7SValeria Barra PetscReal rnorm; 806*cb32e2e7SValeria Barra PetscInt its; 807*cb32e2e7SValeria Barra ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 808*cb32e2e7SValeria Barra ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 809*cb32e2e7SValeria Barra ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 810*cb32e2e7SValeria Barra ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 811*cb32e2e7SValeria Barra if (!test_mode || reason < 0 || rnorm > 1e-8) { 812*cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 813*cb32e2e7SValeria Barra " KSP:\n" 814*cb32e2e7SValeria Barra " KSP Type : %s\n" 815*cb32e2e7SValeria Barra " KSP Convergence : %s\n" 816*cb32e2e7SValeria Barra " Total KSP Iterations : %D\n" 817*cb32e2e7SValeria Barra " Final rnorm : %e\n", 818*cb32e2e7SValeria Barra ksptype, KSPConvergedReasons[reason], its, 819*cb32e2e7SValeria Barra (double)rnorm); CHKERRQ(ierr); 820*cb32e2e7SValeria Barra } 821*cb32e2e7SValeria Barra if (benchmark_mode && (!test_mode)) { 822*cb32e2e7SValeria Barra CeedInt gsize; 823*cb32e2e7SValeria Barra ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 824*cb32e2e7SValeria Barra ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); 825*cb32e2e7SValeria Barra CHKERRQ(ierr); 826*cb32e2e7SValeria Barra ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); 827*cb32e2e7SValeria Barra CHKERRQ(ierr); 828*cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 829*cb32e2e7SValeria Barra " Performance:\n" 830*cb32e2e7SValeria Barra " CG Solve Time : %g (%g) sec\n" 831*cb32e2e7SValeria Barra " DoFs/Sec in CG : %g (%g) million\n", 832*cb32e2e7SValeria Barra rt_max, rt_min, 1e-6*gsize*its/rt_max, 833*cb32e2e7SValeria Barra 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 834*cb32e2e7SValeria Barra } 835*cb32e2e7SValeria Barra } 836*cb32e2e7SValeria Barra 837*cb32e2e7SValeria Barra { 838*cb32e2e7SValeria Barra PetscReal maxerror; 839*cb32e2e7SValeria Barra ierr = ComputeErrorMax(user, op_error, X, target, &maxerror); CHKERRQ(ierr); 840*cb32e2e7SValeria Barra PetscReal tol = (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) ? 5e-3 : 5e-2; 841*cb32e2e7SValeria Barra if (!test_mode || maxerror > tol) { 842*cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 843*cb32e2e7SValeria Barra " Pointwise Error (max) : %e\n", 844*cb32e2e7SValeria Barra (double)maxerror); CHKERRQ(ierr); 845*cb32e2e7SValeria Barra } 846*cb32e2e7SValeria Barra } 847*cb32e2e7SValeria Barra 848*cb32e2e7SValeria Barra if (write_solution) { 849*cb32e2e7SValeria Barra PetscViewer vtkviewersoln; 850*cb32e2e7SValeria Barra 851*cb32e2e7SValeria Barra ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr); 852*cb32e2e7SValeria Barra ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 853*cb32e2e7SValeria Barra ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 854*cb32e2e7SValeria Barra ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 855*cb32e2e7SValeria Barra ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 856*cb32e2e7SValeria Barra } 857*cb32e2e7SValeria Barra 858*cb32e2e7SValeria Barra ierr = VecDestroy(&rhs); CHKERRQ(ierr); 859*cb32e2e7SValeria Barra ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 860*cb32e2e7SValeria Barra ierr = VecDestroy(&X); CHKERRQ(ierr); 861*cb32e2e7SValeria Barra ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 862*cb32e2e7SValeria Barra ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 863*cb32e2e7SValeria Barra ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 864*cb32e2e7SValeria Barra ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 865*cb32e2e7SValeria Barra ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 866*cb32e2e7SValeria Barra ierr = MatDestroy(&mat); CHKERRQ(ierr); 867*cb32e2e7SValeria Barra ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 868*cb32e2e7SValeria Barra 869*cb32e2e7SValeria Barra CeedVectorDestroy(&user->xceed); 870*cb32e2e7SValeria Barra CeedVectorDestroy(&user->yceed); 871*cb32e2e7SValeria Barra CeedVectorDestroy(&user->qdata); 872*cb32e2e7SValeria Barra CeedVectorDestroy(&target); 873*cb32e2e7SValeria Barra CeedOperatorDestroy(&op_setupgeo); 874*cb32e2e7SValeria Barra CeedOperatorDestroy(&op_setuprhs); 875*cb32e2e7SValeria Barra CeedOperatorDestroy(&op_apply); 876*cb32e2e7SValeria Barra CeedOperatorDestroy(&op_error); 877*cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictu); 878*cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictx); 879*cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictui); 880*cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictxi); 881*cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictqdi); 882*cb32e2e7SValeria Barra CeedQFunctionDestroy(&qf_setupgeo); 883*cb32e2e7SValeria Barra CeedQFunctionDestroy(&qf_setuprhs); 884*cb32e2e7SValeria Barra CeedQFunctionDestroy(&qf_apply); 885*cb32e2e7SValeria Barra CeedQFunctionDestroy(&qf_error); 886*cb32e2e7SValeria Barra CeedBasisDestroy(&basisu); 887*cb32e2e7SValeria Barra CeedBasisDestroy(&basisx); 888*cb32e2e7SValeria Barra CeedDestroy(&ceed); 889*cb32e2e7SValeria Barra ierr = PetscFree(user); CHKERRQ(ierr); 890*cb32e2e7SValeria Barra return PetscFinalize(); 891*cb32e2e7SValeria Barra } 892