1cb32e2e7SValeria Barra // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2cb32e2e7SValeria Barra // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3cb32e2e7SValeria Barra // reserved. See files LICENSE and NOTICE for details. 4cb32e2e7SValeria Barra // 5cb32e2e7SValeria Barra // This file is part of CEED, a collection of benchmarks, miniapps, software 6cb32e2e7SValeria Barra // libraries and APIs for efficient high-order finite element and spectral 7cb32e2e7SValeria Barra // element discretizations for exascale applications. For more information and 8cb32e2e7SValeria Barra // source code availability see http://github.com/ceed. 9cb32e2e7SValeria Barra // 10cb32e2e7SValeria Barra // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11cb32e2e7SValeria Barra // a collaborative effort of two U.S. Department of Energy organizations (Office 12cb32e2e7SValeria Barra // of Science and the National Nuclear Security Administration) responsible for 13cb32e2e7SValeria Barra // the planning and preparation of a capable exascale ecosystem, including 14cb32e2e7SValeria Barra // software, applications, hardware, advanced system engineering and early 15cb32e2e7SValeria Barra // testbed platforms, in support of the nation's exascale computing imperative. 16cb32e2e7SValeria Barra 17cb32e2e7SValeria Barra // libCEED + PETSc Example: CEED BPs 18cb32e2e7SValeria Barra // 19cb32e2e7SValeria Barra // This example demonstrates a simple usage of libCEED with PETSc to solve the 20cb32e2e7SValeria Barra // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21cb32e2e7SValeria Barra // 22cb32e2e7SValeria Barra // The code is intentionally "raw", using only low-level communication 23cb32e2e7SValeria Barra // primitives. 24cb32e2e7SValeria Barra // 25cb32e2e7SValeria Barra // Build with: 26cb32e2e7SValeria Barra // 27cb32e2e7SValeria Barra // make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 28cb32e2e7SValeria Barra // 29cb32e2e7SValeria Barra // Sample runs: 30cb32e2e7SValeria Barra // 3132d2ee49SValeria Barra // ./bpsraw -problem bp1 3232d2ee49SValeria Barra // ./bpsraw -problem bp2 -ceed /cpu/self 3332d2ee49SValeria Barra // ./bpsraw -problem bp3 -ceed /gpu/occa 3432d2ee49SValeria Barra // ./bpsraw -problem bp4 -ceed /cpu/occa 3532d2ee49SValeria Barra // ./bpsraw -problem bp5 -ceed /omp/occa 3632d2ee49SValeria Barra // ./bpsraw -problem bp6 -ceed /ocl/occa 37cb32e2e7SValeria Barra // 38cb32e2e7SValeria Barra //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -qextra 5 39cb32e2e7SValeria Barra 40cb32e2e7SValeria Barra /// @file 41cb32e2e7SValeria Barra /// CEED BPs example using PETSc 42cb32e2e7SValeria Barra /// See bps.c for an implementation using DMPlex unstructured grids. 43cb32e2e7SValeria Barra const char help[] = "Solve CEED BPs using PETSc\n"; 44cb32e2e7SValeria Barra 45cb32e2e7SValeria Barra #include <stdbool.h> 46cb32e2e7SValeria Barra #include <string.h> 47cb32e2e7SValeria Barra #include <petscksp.h> 48cb32e2e7SValeria Barra #include <ceed.h> 49cb32e2e7SValeria Barra #include "qfunctions/bps/common.h" 50cb32e2e7SValeria Barra #include "qfunctions/bps/bp1.h" 51cb32e2e7SValeria Barra #include "qfunctions/bps/bp2.h" 52cb32e2e7SValeria Barra #include "qfunctions/bps/bp3.h" 53cb32e2e7SValeria Barra #include "qfunctions/bps/bp4.h" 54cb32e2e7SValeria Barra 55*9396343dSjeremylt #include <petscsys.h> 56*9396343dSjeremylt #if PETSC_VERSION_LT(3,12,0) 57*9396343dSjeremylt #ifdef PETSC_HAVE_CUDA 58*9396343dSjeremylt #include <petsccuda.h> 59*9396343dSjeremylt // Note: With PETSc prior to version 3.12.0, providing the source path to 60*9396343dSjeremylt // include 'cublas_v2.h' will be needed to use 'petsccuda.h'. 61*9396343dSjeremylt #endif 62*9396343dSjeremylt #endif 63*9396343dSjeremylt 64cb32e2e7SValeria Barra static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 65cb32e2e7SValeria Barra for (PetscInt d=0,sizeleft=size; d<3; d++) { 66cb32e2e7SValeria Barra PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 67cb32e2e7SValeria Barra while (try * (sizeleft / try) != sizeleft) try++; 68cb32e2e7SValeria Barra m[reverse ? 2-d : d] = try; 69cb32e2e7SValeria Barra sizeleft /= try; 70cb32e2e7SValeria Barra } 71cb32e2e7SValeria Barra } 72cb32e2e7SValeria Barra 73cb32e2e7SValeria Barra static PetscInt Max3(const PetscInt a[3]) { 74cb32e2e7SValeria Barra return PetscMax(a[0], PetscMax(a[1], a[2])); 75cb32e2e7SValeria Barra } 76cb32e2e7SValeria Barra static PetscInt Min3(const PetscInt a[3]) { 77cb32e2e7SValeria Barra return PetscMin(a[0], PetscMin(a[1], a[2])); 78cb32e2e7SValeria Barra } 79cb32e2e7SValeria Barra static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3], 80cb32e2e7SValeria Barra PetscInt degree, const PetscInt melem[3], 81cb32e2e7SValeria Barra PetscInt mnodes[3]) { 82cb32e2e7SValeria Barra for (int d=0; d<3; d++) 83cb32e2e7SValeria Barra mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1); 84cb32e2e7SValeria Barra } 85cb32e2e7SValeria Barra static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 86cb32e2e7SValeria Barra PetscInt degree, const PetscInt melem[3]) { 87cb32e2e7SValeria Barra PetscInt start = 0; 88cb32e2e7SValeria Barra // Dumb brute-force is easier to read 89cb32e2e7SValeria Barra for (PetscInt i=0; i<p[0]; i++) { 90cb32e2e7SValeria Barra for (PetscInt j=0; j<p[1]; j++) { 91cb32e2e7SValeria Barra for (PetscInt k=0; k<p[2]; k++) { 92cb32e2e7SValeria Barra PetscInt mnodes[3], ijkrank[] = {i,j,k}; 93cb32e2e7SValeria Barra if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 94cb32e2e7SValeria Barra GlobalNodes(p, ijkrank, degree, melem, mnodes); 95cb32e2e7SValeria Barra start += mnodes[0] * mnodes[1] * mnodes[2]; 96cb32e2e7SValeria Barra } 97cb32e2e7SValeria Barra } 98cb32e2e7SValeria Barra } 99cb32e2e7SValeria Barra return -1; 100cb32e2e7SValeria Barra } 10161dbc9d2Sjeremylt static int CreateRestriction(Ceed ceed, CeedInterlaceMode imode, 102a8d32208Sjeremylt const CeedInt melem[3], CeedInt P, CeedInt ncomp, 103cb32e2e7SValeria Barra CeedElemRestriction *Erestrict) { 104cb32e2e7SValeria Barra const PetscInt nelem = melem[0]*melem[1]*melem[2]; 105cb32e2e7SValeria Barra PetscInt mnodes[3], *idx, *idxp; 106cb32e2e7SValeria Barra 107cb32e2e7SValeria Barra // Get indicies 108cb32e2e7SValeria Barra for (int d=0; d<3; d++) mnodes[d] = melem[d]*(P-1) + 1; 109cb32e2e7SValeria Barra idxp = idx = malloc(nelem*P*P*P*sizeof idx[0]); 110cb32e2e7SValeria Barra for (CeedInt i=0; i<melem[0]; i++) 111cb32e2e7SValeria Barra for (CeedInt j=0; j<melem[1]; j++) 112cb32e2e7SValeria Barra for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) 113cb32e2e7SValeria Barra for (CeedInt ii=0; ii<P; ii++) 114cb32e2e7SValeria Barra for (CeedInt jj=0; jj<P; jj++) 115cb32e2e7SValeria Barra for (CeedInt kk=0; kk<P; kk++) { 116cb32e2e7SValeria Barra if (0) { // This is the C-style (i,j,k) ordering that I prefer 117cb32e2e7SValeria Barra idxp[(ii*P+jj)*P+kk] = (((i*(P-1)+ii)*mnodes[1] 118cb32e2e7SValeria Barra + (j*(P-1)+jj))*mnodes[2] 119cb32e2e7SValeria Barra + (k*(P-1)+kk)); 120cb32e2e7SValeria Barra } else { // (k,j,i) ordering for consistency with MFEM example 121cb32e2e7SValeria Barra idxp[ii+P*(jj+P*kk)] = (((i*(P-1)+ii)*mnodes[1] 122cb32e2e7SValeria Barra + (j*(P-1)+jj))*mnodes[2] 123cb32e2e7SValeria Barra + (k*(P-1)+kk)); 124cb32e2e7SValeria Barra } 125cb32e2e7SValeria Barra } 126cb32e2e7SValeria Barra 127cb32e2e7SValeria Barra // Setup CEED restriction 12861dbc9d2Sjeremylt CeedElemRestrictionCreate(ceed, imode, nelem, P*P*P, 129a8d32208Sjeremylt mnodes[0]*mnodes[1]*mnodes[2], ncomp, 130a8d32208Sjeremylt CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict); 131cb32e2e7SValeria Barra 132cb32e2e7SValeria Barra PetscFunctionReturn(0); 133cb32e2e7SValeria Barra } 134cb32e2e7SValeria Barra 135cb32e2e7SValeria Barra // Data for PETSc 136cb32e2e7SValeria Barra typedef struct User_ *User; 137cb32e2e7SValeria Barra struct User_ { 138cb32e2e7SValeria Barra MPI_Comm comm; 139cb32e2e7SValeria Barra VecScatter ltog; // Scatter for all entries 140cb32e2e7SValeria Barra VecScatter ltog0; // Skip Dirichlet values 141cb32e2e7SValeria Barra VecScatter gtogD; // global-to-global; only Dirichlet values 142cb32e2e7SValeria Barra Vec Xloc, Yloc; 143cb32e2e7SValeria Barra CeedVector xceed, yceed; 144cb32e2e7SValeria Barra CeedOperator op; 145cb32e2e7SValeria Barra CeedVector qdata; 146cb32e2e7SValeria Barra Ceed ceed; 147*9396343dSjeremylt CeedMemType memtype; 148*9396343dSjeremylt int (*VecGetArray)(Vec, PetscScalar **); 149*9396343dSjeremylt int (*VecGetArrayRead)(Vec, const PetscScalar **); 150*9396343dSjeremylt int (*VecRestoreArray)(Vec, PetscScalar **); 151*9396343dSjeremylt int (*VecRestoreArrayRead)(Vec, const PetscScalar **); 152*9396343dSjeremylt }; 153*9396343dSjeremylt 154*9396343dSjeremylt // MemType Options 155*9396343dSjeremylt static const char *const memTypes[] = {"host","device","memType", 156*9396343dSjeremylt "CEED_MEM_",0 157cb32e2e7SValeria Barra }; 158cb32e2e7SValeria Barra 159cb32e2e7SValeria Barra // BP Options 160cb32e2e7SValeria Barra typedef enum { 161cb32e2e7SValeria Barra CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 162cb32e2e7SValeria Barra CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 163cb32e2e7SValeria Barra } bpType; 164cb32e2e7SValeria Barra static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 165cb32e2e7SValeria Barra "bpType","CEED_BP",0 166cb32e2e7SValeria Barra }; 167cb32e2e7SValeria Barra 168cb32e2e7SValeria Barra // BP specific data 169cb32e2e7SValeria Barra typedef struct { 170cb32e2e7SValeria Barra CeedInt ncompu, qdatasize, qextra; 171cb32e2e7SValeria Barra CeedQFunctionUser setupgeo, setuprhs, apply, error; 172cb32e2e7SValeria Barra const char *setupgeofname, *setuprhsfname, *applyfname, *errorfname; 173cb32e2e7SValeria Barra CeedEvalMode inmode, outmode; 174cb32e2e7SValeria Barra CeedQuadMode qmode; 175cb32e2e7SValeria Barra } bpData; 176cb32e2e7SValeria Barra 177cb32e2e7SValeria Barra bpData bpOptions[6] = { 178cb32e2e7SValeria Barra [CEED_BP1] = { 179cb32e2e7SValeria Barra .ncompu = 1, 180cb32e2e7SValeria Barra .qdatasize = 1, 181cb32e2e7SValeria Barra .qextra = 1, 182cb32e2e7SValeria Barra .setupgeo = SetupMassGeo, 183cb32e2e7SValeria Barra .setuprhs = SetupMassRhs, 184cb32e2e7SValeria Barra .apply = Mass, 185cb32e2e7SValeria Barra .error = Error, 186cb32e2e7SValeria Barra .setupgeofname = SetupMassGeo_loc, 187cb32e2e7SValeria Barra .setuprhsfname = SetupMassRhs_loc, 188cb32e2e7SValeria Barra .applyfname = Mass_loc, 189cb32e2e7SValeria Barra .errorfname = Error_loc, 190cb32e2e7SValeria Barra .inmode = CEED_EVAL_INTERP, 191cb32e2e7SValeria Barra .outmode = CEED_EVAL_INTERP, 192cb32e2e7SValeria Barra .qmode = CEED_GAUSS 193cb32e2e7SValeria Barra }, 194cb32e2e7SValeria Barra [CEED_BP2] = { 195cb32e2e7SValeria Barra .ncompu = 3, 196cb32e2e7SValeria Barra .qdatasize = 1, 197cb32e2e7SValeria Barra .qextra = 1, 198cb32e2e7SValeria Barra .setupgeo = SetupMassGeo, 199cb32e2e7SValeria Barra .setuprhs = SetupMassRhs3, 200cb32e2e7SValeria Barra .apply = Mass3, 201cb32e2e7SValeria Barra .error = Error3, 202cb32e2e7SValeria Barra .setupgeofname = SetupMassGeo_loc, 203cb32e2e7SValeria Barra .setuprhsfname = SetupMassRhs3_loc, 204cb32e2e7SValeria Barra .applyfname = Mass3_loc, 205cb32e2e7SValeria Barra .errorfname = Error3_loc, 206cb32e2e7SValeria Barra .inmode = CEED_EVAL_INTERP, 207cb32e2e7SValeria Barra .outmode = CEED_EVAL_INTERP, 208cb32e2e7SValeria Barra .qmode = CEED_GAUSS 209cb32e2e7SValeria Barra }, 210cb32e2e7SValeria Barra [CEED_BP3] = { 211cb32e2e7SValeria Barra .ncompu = 1, 212cb32e2e7SValeria Barra .qdatasize = 6, 213cb32e2e7SValeria Barra .qextra = 1, 214cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 215cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs, 216cb32e2e7SValeria Barra .apply = Diff, 217cb32e2e7SValeria Barra .error = Error, 218cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 219cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs_loc, 220cb32e2e7SValeria Barra .applyfname = Diff_loc, 221cb32e2e7SValeria Barra .errorfname = Error_loc, 222cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 223cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 224cb32e2e7SValeria Barra .qmode = CEED_GAUSS 225cb32e2e7SValeria Barra }, 226cb32e2e7SValeria Barra [CEED_BP4] = { 227cb32e2e7SValeria Barra .ncompu = 3, 228cb32e2e7SValeria Barra .qdatasize = 6, 229cb32e2e7SValeria Barra .qextra = 1, 230cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 231cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs3, 232cb32e2e7SValeria Barra .apply = Diff3, 233cb32e2e7SValeria Barra .error = Error3, 234cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 235cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs3_loc, 236cb32e2e7SValeria Barra .applyfname = Diff_loc, 237cb32e2e7SValeria Barra .errorfname = Error3_loc, 238cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 239cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 240cb32e2e7SValeria Barra .qmode = CEED_GAUSS 241cb32e2e7SValeria Barra }, 242cb32e2e7SValeria Barra [CEED_BP5] = { 243cb32e2e7SValeria Barra .ncompu = 1, 244cb32e2e7SValeria Barra .qdatasize = 6, 245cb32e2e7SValeria Barra .qextra = 0, 246cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 247cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs, 248cb32e2e7SValeria Barra .apply = Diff, 249cb32e2e7SValeria Barra .error = Error, 250cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 251cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs_loc, 252cb32e2e7SValeria Barra .applyfname = Diff_loc, 253cb32e2e7SValeria Barra .errorfname = Error_loc, 254cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 255cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 256cb32e2e7SValeria Barra .qmode = CEED_GAUSS_LOBATTO 257cb32e2e7SValeria Barra }, 258cb32e2e7SValeria Barra [CEED_BP6] = { 259cb32e2e7SValeria Barra .ncompu = 3, 260cb32e2e7SValeria Barra .qdatasize = 6, 261cb32e2e7SValeria Barra .qextra = 0, 262cb32e2e7SValeria Barra .setupgeo = SetupDiffGeo, 263cb32e2e7SValeria Barra .setuprhs = SetupDiffRhs3, 264cb32e2e7SValeria Barra .apply = Diff3, 265cb32e2e7SValeria Barra .error = Error3, 266cb32e2e7SValeria Barra .setupgeofname = SetupDiffGeo_loc, 267cb32e2e7SValeria Barra .setuprhsfname = SetupDiffRhs3_loc, 268cb32e2e7SValeria Barra .applyfname = Diff_loc, 269cb32e2e7SValeria Barra .errorfname = Error3_loc, 270cb32e2e7SValeria Barra .inmode = CEED_EVAL_GRAD, 271cb32e2e7SValeria Barra .outmode = CEED_EVAL_GRAD, 272cb32e2e7SValeria Barra .qmode = CEED_GAUSS_LOBATTO 273cb32e2e7SValeria Barra } 274cb32e2e7SValeria Barra }; 275cb32e2e7SValeria Barra 276cb32e2e7SValeria Barra // This function uses libCEED to compute the action of the mass matrix 277cb32e2e7SValeria Barra static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 278cb32e2e7SValeria Barra PetscErrorCode ierr; 279cb32e2e7SValeria Barra User user; 280cb32e2e7SValeria Barra PetscScalar *x, *y; 281cb32e2e7SValeria Barra 282cb32e2e7SValeria Barra PetscFunctionBeginUser; 283*9396343dSjeremylt 284cb32e2e7SValeria Barra ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 285*9396343dSjeremylt 286*9396343dSjeremylt // Global-to-local 287cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 288cb32e2e7SValeria Barra SCATTER_REVERSE); CHKERRQ(ierr); 289*9396343dSjeremylt ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, 290*9396343dSjeremylt SCATTER_REVERSE); CHKERRQ(ierr); 291cb32e2e7SValeria Barra ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 292cb32e2e7SValeria Barra 293*9396343dSjeremylt // Setup libCEED vectors 294*9396343dSjeremylt ierr = user->VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); 295*9396343dSjeremylt CHKERRQ(ierr); 296*9396343dSjeremylt ierr = user->VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 297*9396343dSjeremylt CeedVectorSetArray(user->xceed, user->memtype, CEED_USE_POINTER, x); 298*9396343dSjeremylt CeedVectorSetArray(user->yceed, user->memtype, CEED_USE_POINTER, y); 299cb32e2e7SValeria Barra 300*9396343dSjeremylt // Apply libCEED operator 301cb32e2e7SValeria Barra CeedOperatorApply(user->op, user->xceed, user->yceed, 302cb32e2e7SValeria Barra CEED_REQUEST_IMMEDIATE); 303*9396343dSjeremylt CeedVectorSyncArray(user->yceed, user->memtype); 304cb32e2e7SValeria Barra 305*9396343dSjeremylt // Restore PETSc vectors 306*9396343dSjeremylt ierr = user->VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); 307*9396343dSjeremylt CHKERRQ(ierr); 308*9396343dSjeremylt ierr = user->VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 309cb32e2e7SValeria Barra 310*9396343dSjeremylt // Local-to-global 311cb32e2e7SValeria Barra if (Y) { 312cb32e2e7SValeria Barra ierr = VecZeroEntries(Y); CHKERRQ(ierr); 313*9396343dSjeremylt ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, 314*9396343dSjeremylt SCATTER_FORWARD); CHKERRQ(ierr); 315*9396343dSjeremylt ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, 316*9396343dSjeremylt SCATTER_FORWARD); CHKERRQ(ierr); 317cb32e2e7SValeria Barra } 318cb32e2e7SValeria Barra PetscFunctionReturn(0); 319cb32e2e7SValeria Barra } 320cb32e2e7SValeria Barra 321cb32e2e7SValeria Barra // This function uses libCEED to compute the action of the Laplacian with 322cb32e2e7SValeria Barra // Dirichlet boundary conditions 323cb32e2e7SValeria Barra static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 324cb32e2e7SValeria Barra PetscErrorCode ierr; 325cb32e2e7SValeria Barra User user; 326cb32e2e7SValeria Barra PetscScalar *x, *y; 327cb32e2e7SValeria Barra 328cb32e2e7SValeria Barra PetscFunctionBeginUser; 329*9396343dSjeremylt 330cb32e2e7SValeria Barra ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 331cb32e2e7SValeria Barra 332cb32e2e7SValeria Barra // Global-to-local 333cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 334cb32e2e7SValeria Barra SCATTER_REVERSE); CHKERRQ(ierr); 335cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 336cb32e2e7SValeria Barra SCATTER_REVERSE); 337cb32e2e7SValeria Barra CHKERRQ(ierr); 338cb32e2e7SValeria Barra ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 339cb32e2e7SValeria Barra 340*9396343dSjeremylt // Setup libCEED vectors 341*9396343dSjeremylt ierr = user->VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); 342*9396343dSjeremylt CHKERRQ(ierr); 343*9396343dSjeremylt ierr = user->VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 344*9396343dSjeremylt CeedVectorSetArray(user->xceed, user->memtype, CEED_USE_POINTER, x); 345*9396343dSjeremylt CeedVectorSetArray(user->yceed, user->memtype, CEED_USE_POINTER, y); 346cb32e2e7SValeria Barra 347*9396343dSjeremylt // Apply libCEED operator 348cb32e2e7SValeria Barra CeedOperatorApply(user->op, user->xceed, user->yceed, 349cb32e2e7SValeria Barra CEED_REQUEST_IMMEDIATE); 350*9396343dSjeremylt CeedVectorSyncArray(user->yceed, user->memtype); 351cb32e2e7SValeria Barra 352cb32e2e7SValeria Barra // Restore PETSc vectors 353*9396343dSjeremylt ierr = user->VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); 354*9396343dSjeremylt CHKERRQ(ierr); 355*9396343dSjeremylt ierr = user->VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 356cb32e2e7SValeria Barra 357cb32e2e7SValeria Barra // Local-to-global 358cb32e2e7SValeria Barra ierr = VecZeroEntries(Y); CHKERRQ(ierr); 359cb32e2e7SValeria Barra ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 360cb32e2e7SValeria Barra CHKERRQ(ierr); 361cb32e2e7SValeria Barra ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 362cb32e2e7SValeria Barra CHKERRQ(ierr); 363*9396343dSjeremylt ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, 364*9396343dSjeremylt SCATTER_FORWARD); CHKERRQ(ierr); 365cb32e2e7SValeria Barra ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 366cb32e2e7SValeria Barra CHKERRQ(ierr); 367cb32e2e7SValeria Barra 368cb32e2e7SValeria Barra PetscFunctionReturn(0); 369cb32e2e7SValeria Barra } 370cb32e2e7SValeria Barra 371cb32e2e7SValeria Barra // This function calculates the error in the final solution 372c70bd2a0Sjeremylt static PetscErrorCode ComputeErrorMax(User user, CeedOperator operror, Vec X, 373cb32e2e7SValeria Barra CeedVector target, PetscReal *maxerror) { 374cb32e2e7SValeria Barra PetscErrorCode ierr; 375cb32e2e7SValeria Barra PetscScalar *x; 376cb32e2e7SValeria Barra CeedVector collocated_error; 377cb32e2e7SValeria Barra CeedInt length; 378cb32e2e7SValeria Barra 379cb32e2e7SValeria Barra PetscFunctionBeginUser; 380*9396343dSjeremylt 381cb32e2e7SValeria Barra CeedVectorGetLength(target, &length); 382cb32e2e7SValeria Barra CeedVectorCreate(user->ceed, length, &collocated_error); 383cb32e2e7SValeria Barra 384cb32e2e7SValeria Barra // Global-to-local 385cb32e2e7SValeria Barra ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 386cb32e2e7SValeria Barra SCATTER_REVERSE); CHKERRQ(ierr); 387*9396343dSjeremylt ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, 388*9396343dSjeremylt SCATTER_REVERSE); CHKERRQ(ierr); 389*9396343dSjeremylt 390*9396343dSjeremylt // Setup libCEED vector 391*9396343dSjeremylt ierr = user->VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); 392cb32e2e7SValeria Barra CHKERRQ(ierr); 393*9396343dSjeremylt CeedVectorSetArray(user->xceed, user->memtype, CEED_USE_POINTER, x); 394cb32e2e7SValeria Barra 395*9396343dSjeremylt // Apply libCEED operator 396c70bd2a0Sjeremylt CeedOperatorApply(operror, user->xceed, collocated_error, 397cb32e2e7SValeria Barra CEED_REQUEST_IMMEDIATE); 398cb32e2e7SValeria Barra 399cb32e2e7SValeria Barra // Restore PETSc vector 400*9396343dSjeremylt ierr = user->VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); 401*9396343dSjeremylt CHKERRQ(ierr); 402cb32e2e7SValeria Barra 403cb32e2e7SValeria Barra // Reduce max error 404cb32e2e7SValeria Barra *maxerror = 0; 405cb32e2e7SValeria Barra const CeedScalar *e; 406cb32e2e7SValeria Barra CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 407cb32e2e7SValeria Barra for (CeedInt i=0; i<length; i++) { 408cb32e2e7SValeria Barra *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 409cb32e2e7SValeria Barra } 410cb32e2e7SValeria Barra CeedVectorRestoreArrayRead(collocated_error, &e); 411*9396343dSjeremylt ierr = MPI_Allreduce(MPI_IN_PLACE, maxerror, 1, MPIU_REAL, MPIU_MAX, 412*9396343dSjeremylt user->comm); CHKERRQ(ierr); 413cb32e2e7SValeria Barra 414cb32e2e7SValeria Barra // Cleanup 415cb32e2e7SValeria Barra CeedVectorDestroy(&collocated_error); 416cb32e2e7SValeria Barra 417cb32e2e7SValeria Barra PetscFunctionReturn(0); 418cb32e2e7SValeria Barra } 419cb32e2e7SValeria Barra 420cb32e2e7SValeria Barra int main(int argc, char **argv) { 421cb32e2e7SValeria Barra PetscInt ierr; 422cb32e2e7SValeria Barra MPI_Comm comm; 423cb32e2e7SValeria Barra char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 424cb32e2e7SValeria Barra double my_rt_start, my_rt, rt_min, rt_max; 425cb32e2e7SValeria Barra PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3], 426cb32e2e7SValeria Barra irank[3], lnodes[3], lsize, ncompu = 1; 427cb32e2e7SValeria Barra PetscScalar *r; 428cb32e2e7SValeria Barra PetscBool test_mode, benchmark_mode, write_solution; 429cb32e2e7SValeria Barra PetscMPIInt size, rank; 430cb32e2e7SValeria Barra Vec X, Xloc, rhs, rhsloc; 431cb32e2e7SValeria Barra Mat mat; 432cb32e2e7SValeria Barra KSP ksp; 433cb32e2e7SValeria Barra VecScatter ltog, ltog0, gtogD; 434cb32e2e7SValeria Barra User user; 435cb32e2e7SValeria Barra Ceed ceed; 436cb32e2e7SValeria Barra CeedBasis basisx, basisu; 43715910d16Sjeremylt CeedElemRestriction Erestrictx, Erestrictu, Erestrictui, Erestrictqdi; 438c70bd2a0Sjeremylt CeedQFunction qfsetupgeo, qfsetuprhs, qfapply, qferror; 439c70bd2a0Sjeremylt CeedOperator opsetupgeo, opsetuprhs, opapply, operror; 440cb32e2e7SValeria Barra CeedVector xcoord, qdata, rhsceed, target; 441*9396343dSjeremylt CeedMemType memtyperequested; 442cb32e2e7SValeria Barra CeedInt P, Q; 443cb32e2e7SValeria Barra const CeedInt dim = 3, ncompx = 3; 444199551f5Sjeremylt bpType bpchoice; 445cb32e2e7SValeria Barra 446*9396343dSjeremylt // Check for PETSc CUDA availability 447*9396343dSjeremylt PetscBool petschavecuda, setmemtyperequest = PETSC_FALSE; 448*9396343dSjeremylt // *INDENT-OFF* 449*9396343dSjeremylt #ifdef PETSC_HAVE_CUDA 450*9396343dSjeremylt petschavecuda = PETSC_TRUE; 451*9396343dSjeremylt #else 452*9396343dSjeremylt petschavecuda = PETSC_FALSE; 453*9396343dSjeremylt #endif 454*9396343dSjeremylt // *INDENT-ON* 455*9396343dSjeremylt 456cb32e2e7SValeria Barra ierr = PetscInitialize(&argc, &argv, NULL, help); 457cb32e2e7SValeria Barra if (ierr) return ierr; 458cb32e2e7SValeria Barra comm = PETSC_COMM_WORLD; 45932d2ee49SValeria Barra 46032d2ee49SValeria Barra // Read command line options 461cb32e2e7SValeria Barra ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 462199551f5Sjeremylt bpchoice = CEED_BP1; 463cb32e2e7SValeria Barra ierr = PetscOptionsEnum("-problem", 464cb32e2e7SValeria Barra "CEED benchmark problem to solve", NULL, 465199551f5Sjeremylt bpTypes, (PetscEnum)bpchoice, (PetscEnum *)&bpchoice, 466cb32e2e7SValeria Barra NULL); CHKERRQ(ierr); 467199551f5Sjeremylt ncompu = bpOptions[bpchoice].ncompu; 468cb32e2e7SValeria Barra test_mode = PETSC_FALSE; 469cb32e2e7SValeria Barra ierr = PetscOptionsBool("-test", 470cb32e2e7SValeria Barra "Testing mode (do not print unless error is large)", 471cb32e2e7SValeria Barra NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 472cb32e2e7SValeria Barra benchmark_mode = PETSC_FALSE; 473cb32e2e7SValeria Barra ierr = PetscOptionsBool("-benchmark", 474cb32e2e7SValeria Barra "Benchmarking mode (prints benchmark statistics)", 475cb32e2e7SValeria Barra NULL, benchmark_mode, &benchmark_mode, NULL); 476cb32e2e7SValeria Barra CHKERRQ(ierr); 477cb32e2e7SValeria Barra write_solution = PETSC_FALSE; 478cb32e2e7SValeria Barra ierr = PetscOptionsBool("-write_solution", 479cb32e2e7SValeria Barra "Write solution for visualization", 480cb32e2e7SValeria Barra NULL, write_solution, &write_solution, NULL); 481cb32e2e7SValeria Barra CHKERRQ(ierr); 482cb32e2e7SValeria Barra degree = test_mode ? 3 : 1; 483cb32e2e7SValeria Barra ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 484cb32e2e7SValeria Barra NULL, degree, °ree, NULL); CHKERRQ(ierr); 485199551f5Sjeremylt qextra = bpOptions[bpchoice].qextra; 486cb32e2e7SValeria Barra ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 487cb32e2e7SValeria Barra NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 488cb32e2e7SValeria Barra ierr = PetscOptionsString("-ceed", "CEED resource specifier", 489cb32e2e7SValeria Barra NULL, ceedresource, ceedresource, 490cb32e2e7SValeria Barra sizeof(ceedresource), NULL); CHKERRQ(ierr); 491cb32e2e7SValeria Barra localnodes = 1000; 492cb32e2e7SValeria Barra ierr = PetscOptionsInt("-local", 493cb32e2e7SValeria Barra "Target number of locally owned nodes per process", 494cb32e2e7SValeria Barra NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr); 495*9396343dSjeremylt memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST; 496*9396343dSjeremylt ierr = PetscOptionsEnum("-memtype", 497*9396343dSjeremylt "CEED MemType requested", NULL, 498*9396343dSjeremylt memTypes, (PetscEnum)memtyperequested, 499*9396343dSjeremylt (PetscEnum *)&memtyperequested, &setmemtyperequest); 500*9396343dSjeremylt CHKERRQ(ierr); 501cb32e2e7SValeria Barra ierr = PetscOptionsEnd(); CHKERRQ(ierr); 502cb32e2e7SValeria Barra P = degree + 1; 503cb32e2e7SValeria Barra Q = P + qextra; 504cb32e2e7SValeria Barra 505*9396343dSjeremylt // Set up libCEED 506*9396343dSjeremylt CeedInit(ceedresource, &ceed); 507*9396343dSjeremylt CeedMemType memtypebackend; 508*9396343dSjeremylt CeedGetPreferredMemType(ceed, &memtypebackend); 509*9396343dSjeremylt 510*9396343dSjeremylt // Check memtype compatibility 511*9396343dSjeremylt if (!setmemtyperequest) 512*9396343dSjeremylt memtyperequested = memtypebackend; 513*9396343dSjeremylt else if (!petschavecuda && memtyperequested == CEED_MEM_DEVICE) 514*9396343dSjeremylt SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 515*9396343dSjeremylt "PETSc was not built with CUDA. " 516*9396343dSjeremylt "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 517*9396343dSjeremylt 518cb32e2e7SValeria Barra // Determine size of process grid 519cb32e2e7SValeria Barra ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 520cb32e2e7SValeria Barra Split3(size, p, false); 521cb32e2e7SValeria Barra 522cb32e2e7SValeria Barra // Find a nicely composite number of elements no less than localnodes 523cb32e2e7SValeria Barra for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ; 524cb32e2e7SValeria Barra localelem++) { 525cb32e2e7SValeria Barra Split3(localelem, melem, true); 526cb32e2e7SValeria Barra if (Max3(melem) / Min3(melem) <= 2) break; 527cb32e2e7SValeria Barra } 528cb32e2e7SValeria Barra 529cb32e2e7SValeria Barra // Find my location in the process grid 530cb32e2e7SValeria Barra ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 531cb32e2e7SValeria Barra for (int d=0,rankleft=rank; d<dim; d++) { 532cb32e2e7SValeria Barra const int pstride[3] = {p[1] *p[2], p[2], 1}; 533cb32e2e7SValeria Barra irank[d] = rankleft / pstride[d]; 534cb32e2e7SValeria Barra rankleft -= irank[d] * pstride[d]; 535cb32e2e7SValeria Barra } 536cb32e2e7SValeria Barra 537cb32e2e7SValeria Barra GlobalNodes(p, irank, degree, melem, mnodes); 538cb32e2e7SValeria Barra 539cb32e2e7SValeria Barra // Setup global vector 540*9396343dSjeremylt if (memtyperequested == CEED_MEM_DEVICE) { 541*9396343dSjeremylt ierr = VecSetType(X, VECCUDA); CHKERRQ(ierr); 542*9396343dSjeremylt } 543cb32e2e7SValeria Barra ierr = VecCreate(comm, &X); CHKERRQ(ierr); 544cb32e2e7SValeria Barra ierr = VecSetSizes(X, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE); 545cb32e2e7SValeria Barra CHKERRQ(ierr); 546cb32e2e7SValeria Barra ierr = VecSetUp(X); CHKERRQ(ierr); 547cb32e2e7SValeria Barra 548cb32e2e7SValeria Barra // Set up libCEED 549cb32e2e7SValeria Barra CeedInit(ceedresource, &ceed); 550cb32e2e7SValeria Barra 551cb32e2e7SValeria Barra // Print summary 552cb32e2e7SValeria Barra CeedInt gsize; 553cb32e2e7SValeria Barra ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 554dc7d240cSValeria Barra if (!test_mode) { 555cb32e2e7SValeria Barra const char *usedresource; 556cb32e2e7SValeria Barra CeedGetResource(ceed, &usedresource); 557*9396343dSjeremylt 558*9396343dSjeremylt VecType vectype; 559*9396343dSjeremylt ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 560*9396343dSjeremylt 561cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 562cb32e2e7SValeria Barra "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 563*9396343dSjeremylt " PETSc:\n" 564*9396343dSjeremylt " PETSc Vec Type : %s\n" 565cb32e2e7SValeria Barra " libCEED:\n" 566cb32e2e7SValeria Barra " libCEED Backend : %s\n" 567*9396343dSjeremylt " libCEED Backend MemType : %s\n" 568*9396343dSjeremylt " libCEED User Requested MemType : %s\n" 569cb32e2e7SValeria Barra " Mesh:\n" 570cb32e2e7SValeria Barra " Number of 1D Basis Nodes (p) : %d\n" 571cb32e2e7SValeria Barra " Number of 1D Quadrature Points (q) : %d\n" 572cb32e2e7SValeria Barra " Global nodes : %D\n" 573cb32e2e7SValeria Barra " Process Decomposition : %D %D %D\n" 574cb32e2e7SValeria Barra " Local Elements : %D = %D %D %D\n" 575db419314Sjeremylt " Owned nodes : %D = %D %D %D\n" 576db419314Sjeremylt " DoF per node : %D\n", 577*9396343dSjeremylt bpchoice+1, vectype, usedresource, 578*9396343dSjeremylt CeedMemTypes[memtypebackend], 579*9396343dSjeremylt (setmemtyperequest) ? 580*9396343dSjeremylt CeedMemTypes[memtyperequested] : "none", 581*9396343dSjeremylt P, Q, gsize/ncompu, p[0], p[1], p[2], localelem, 582*9396343dSjeremylt melem[0], melem[1], melem[2], 583cb32e2e7SValeria Barra mnodes[0]*mnodes[1]*mnodes[2], mnodes[0], mnodes[1], 584db419314Sjeremylt mnodes[2], ncompu); CHKERRQ(ierr); 585cb32e2e7SValeria Barra } 586cb32e2e7SValeria Barra 587cb32e2e7SValeria Barra { 588cb32e2e7SValeria Barra lsize = 1; 589cb32e2e7SValeria Barra for (int d=0; d<dim; d++) { 590cb32e2e7SValeria Barra lnodes[d] = melem[d]*degree + 1; 591cb32e2e7SValeria Barra lsize *= lnodes[d]; 592cb32e2e7SValeria Barra } 593*9396343dSjeremylt if (memtyperequested == CEED_MEM_DEVICE) { 594*9396343dSjeremylt ierr = VecSetType(Xloc, VECCUDA); CHKERRQ(ierr); 595*9396343dSjeremylt } 596cb32e2e7SValeria Barra ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 597cb32e2e7SValeria Barra ierr = VecSetSizes(Xloc, lsize*ncompu, PETSC_DECIDE); CHKERRQ(ierr); 598cb32e2e7SValeria Barra ierr = VecSetUp(Xloc); CHKERRQ(ierr); 599cb32e2e7SValeria Barra 600cb32e2e7SValeria Barra // Create local-to-global scatter 601cb32e2e7SValeria Barra PetscInt *ltogind, *ltogind0, *locind, l0count; 602cb32e2e7SValeria Barra IS ltogis, ltogis0, locis; 603cb32e2e7SValeria Barra PetscInt gstart[2][2][2], gmnodes[2][2][2][dim]; 604cb32e2e7SValeria Barra 605cb32e2e7SValeria Barra for (int i=0; i<2; i++) { 606cb32e2e7SValeria Barra for (int j=0; j<2; j++) { 607cb32e2e7SValeria Barra for (int k=0; k<2; k++) { 608cb32e2e7SValeria Barra PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 609cb32e2e7SValeria Barra gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 610cb32e2e7SValeria Barra GlobalNodes(p, ijkrank, degree, melem, gmnodes[i][j][k]); 611cb32e2e7SValeria Barra } 612cb32e2e7SValeria Barra } 613cb32e2e7SValeria Barra } 614cb32e2e7SValeria Barra 615cb32e2e7SValeria Barra ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 616cb32e2e7SValeria Barra ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 617cb32e2e7SValeria Barra ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 618cb32e2e7SValeria Barra l0count = 0; 619cb32e2e7SValeria Barra for (PetscInt i=0,ir,ii; ir=i>=mnodes[0], ii=i-ir*mnodes[0], i<lnodes[0]; i++) 620cb32e2e7SValeria Barra for (PetscInt j=0,jr,jj; jr=j>=mnodes[1], jj=j-jr*mnodes[1], j<lnodes[1]; j++) 621cb32e2e7SValeria Barra for (PetscInt k=0,kr,kk; kr=k>=mnodes[2], kk=k-kr*mnodes[2], k<lnodes[2]; k++) { 622cb32e2e7SValeria Barra PetscInt here = (i*lnodes[1]+j)*lnodes[2]+k; 623cb32e2e7SValeria Barra ltogind[here] = 624cb32e2e7SValeria Barra gstart[ir][jr][kr] + (ii*gmnodes[ir][jr][kr][1]+jj)*gmnodes[ir][jr][kr][2]+kk; 625cb32e2e7SValeria Barra if ((irank[0] == 0 && i == 0) 626cb32e2e7SValeria Barra || (irank[1] == 0 && j == 0) 627cb32e2e7SValeria Barra || (irank[2] == 0 && k == 0) 628cb32e2e7SValeria Barra || (irank[0]+1 == p[0] && i+1 == lnodes[0]) 629cb32e2e7SValeria Barra || (irank[1]+1 == p[1] && j+1 == lnodes[1]) 630cb32e2e7SValeria Barra || (irank[2]+1 == p[2] && k+1 == lnodes[2])) 631cb32e2e7SValeria Barra continue; 632cb32e2e7SValeria Barra ltogind0[l0count] = ltogind[here]; 633cb32e2e7SValeria Barra locind[l0count++] = here; 634cb32e2e7SValeria Barra } 635cb32e2e7SValeria Barra ierr = ISCreateBlock(comm, ncompu, lsize, ltogind, PETSC_OWN_POINTER, 636cb32e2e7SValeria Barra <ogis); CHKERRQ(ierr); 637cb32e2e7SValeria Barra ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 638cb32e2e7SValeria Barra CHKERRQ(ierr); 639cb32e2e7SValeria Barra ierr = ISCreateBlock(comm, ncompu, l0count, ltogind0, PETSC_OWN_POINTER, 640cb32e2e7SValeria Barra <ogis0); CHKERRQ(ierr); 641cb32e2e7SValeria Barra ierr = ISCreateBlock(comm, ncompu, l0count, locind, PETSC_OWN_POINTER, 642cb32e2e7SValeria Barra &locis); CHKERRQ(ierr); 643cb32e2e7SValeria Barra ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 644cb32e2e7SValeria Barra { 645cb32e2e7SValeria Barra // Create global-to-global scatter for Dirichlet values (everything not in 646cb32e2e7SValeria Barra // ltogis0, which is the range of ltog0) 647cb32e2e7SValeria Barra PetscInt xstart, xend, *indD, countD = 0; 648cb32e2e7SValeria Barra IS isD; 649cb32e2e7SValeria Barra const PetscScalar *x; 650cb32e2e7SValeria Barra ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 651cb32e2e7SValeria Barra ierr = VecSet(X, 1.0); CHKERRQ(ierr); 652cb32e2e7SValeria Barra ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 653cb32e2e7SValeria Barra CHKERRQ(ierr); 654cb32e2e7SValeria Barra ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 655cb32e2e7SValeria Barra CHKERRQ(ierr); 656cb32e2e7SValeria Barra ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 657cb32e2e7SValeria Barra ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 658cb32e2e7SValeria Barra ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 659cb32e2e7SValeria Barra for (PetscInt i=0; i<xend-xstart; i++) { 660cb32e2e7SValeria Barra if (x[i] == 1.) indD[countD++] = xstart + i; 661cb32e2e7SValeria Barra } 662cb32e2e7SValeria Barra ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 663cb32e2e7SValeria Barra ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 664cb32e2e7SValeria Barra CHKERRQ(ierr); 665cb32e2e7SValeria Barra ierr = PetscFree(indD); CHKERRQ(ierr); 666cb32e2e7SValeria Barra ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 667cb32e2e7SValeria Barra ierr = ISDestroy(&isD); CHKERRQ(ierr); 668cb32e2e7SValeria Barra } 669cb32e2e7SValeria Barra ierr = ISDestroy(<ogis); CHKERRQ(ierr); 670cb32e2e7SValeria Barra ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 671cb32e2e7SValeria Barra ierr = ISDestroy(&locis); CHKERRQ(ierr); 672cb32e2e7SValeria Barra } 673cb32e2e7SValeria Barra 674cb32e2e7SValeria Barra // CEED bases 675cb32e2e7SValeria Barra CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompu, P, Q, 676199551f5Sjeremylt bpOptions[bpchoice].qmode, &basisu); 677cb32e2e7SValeria Barra CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, Q, 678199551f5Sjeremylt bpOptions[bpchoice].qmode, &basisx); 679cb32e2e7SValeria Barra 680cb32e2e7SValeria Barra // CEED restrictions 68174312f09Sjeremylt CreateRestriction(ceed, CEED_INTERLACED, melem, P, ncompu, &Erestrictu); 68274312f09Sjeremylt CreateRestriction(ceed, CEED_NONINTERLACED, melem, 2, dim, &Erestrictx); 683cb32e2e7SValeria Barra CeedInt nelem = melem[0]*melem[1]*melem[2]; 6847509a596Sjeremylt CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, ncompu, 685523b8ea0Sjeremylt CEED_STRIDES_BACKEND, &Erestrictui); 6867509a596Sjeremylt CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, 687199551f5Sjeremylt bpOptions[bpchoice].qdatasize, 688523b8ea0Sjeremylt CEED_STRIDES_BACKEND, &Erestrictqdi); 689cb32e2e7SValeria Barra { 690cb32e2e7SValeria Barra CeedScalar *xloc; 691cb32e2e7SValeria Barra CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 692cb32e2e7SValeria Barra shape[0]*shape[1]*shape[2]; 693cb32e2e7SValeria Barra xloc = malloc(len*ncompx*sizeof xloc[0]); 694cb32e2e7SValeria Barra for (CeedInt i=0; i<shape[0]; i++) { 695cb32e2e7SValeria Barra for (CeedInt j=0; j<shape[1]; j++) { 696cb32e2e7SValeria Barra for (CeedInt k=0; k<shape[2]; k++) { 697cb32e2e7SValeria Barra xloc[((i*shape[1]+j)*shape[2]+k) + 0*len] = 1.*(irank[0]*melem[0]+i) / 698cb32e2e7SValeria Barra (p[0]*melem[0]); 699cb32e2e7SValeria Barra xloc[((i*shape[1]+j)*shape[2]+k) + 1*len] = 1.*(irank[1]*melem[1]+j) / 700cb32e2e7SValeria Barra (p[1]*melem[1]); 701cb32e2e7SValeria Barra xloc[((i*shape[1]+j)*shape[2]+k) + 2*len] = 1.*(irank[2]*melem[2]+k) / 702cb32e2e7SValeria Barra (p[2]*melem[2]); 703cb32e2e7SValeria Barra } 704cb32e2e7SValeria Barra } 705cb32e2e7SValeria Barra } 706cb32e2e7SValeria Barra CeedVectorCreate(ceed, len*ncompx, &xcoord); 707cb32e2e7SValeria Barra CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 708cb32e2e7SValeria Barra } 709cb32e2e7SValeria Barra 710cb32e2e7SValeria Barra // Create the Qfunction that builds the operator quadrature data 711199551f5Sjeremylt CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setupgeo, 712199551f5Sjeremylt bpOptions[bpchoice].setupgeofname, &qfsetupgeo); 713c70bd2a0Sjeremylt CeedQFunctionAddInput(qfsetupgeo, "dx", ncompx*dim, CEED_EVAL_GRAD); 714c70bd2a0Sjeremylt CeedQFunctionAddInput(qfsetupgeo, "weight", 1, CEED_EVAL_WEIGHT); 715199551f5Sjeremylt CeedQFunctionAddOutput(qfsetupgeo, "qdata", bpOptions[bpchoice].qdatasize, 716cb32e2e7SValeria Barra CEED_EVAL_NONE); 717cb32e2e7SValeria Barra 718cb32e2e7SValeria Barra // Create the Qfunction that sets up the RHS and true solution 719199551f5Sjeremylt CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setuprhs, 720199551f5Sjeremylt bpOptions[bpchoice].setuprhsfname, &qfsetuprhs); 721c70bd2a0Sjeremylt CeedQFunctionAddInput(qfsetuprhs, "x", ncompx, CEED_EVAL_INTERP); 722c70bd2a0Sjeremylt CeedQFunctionAddInput(qfsetuprhs, "dx", ncompx*dim, CEED_EVAL_GRAD); 723c70bd2a0Sjeremylt CeedQFunctionAddInput(qfsetuprhs, "weight", 1, CEED_EVAL_WEIGHT); 724c70bd2a0Sjeremylt CeedQFunctionAddOutput(qfsetuprhs, "true_soln", ncompu, CEED_EVAL_NONE); 725c70bd2a0Sjeremylt CeedQFunctionAddOutput(qfsetuprhs, "rhs", ncompu, CEED_EVAL_INTERP); 726cb32e2e7SValeria Barra 727cb32e2e7SValeria Barra // Set up PDE operator 728199551f5Sjeremylt CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].apply, 729199551f5Sjeremylt bpOptions[bpchoice].applyfname, &qfapply); 730cb32e2e7SValeria Barra // Add inputs and outputs 731199551f5Sjeremylt CeedInt inscale = bpOptions[bpchoice].inmode==CEED_EVAL_GRAD ? 3 : 1; 732199551f5Sjeremylt CeedInt outscale = bpOptions[bpchoice].outmode==CEED_EVAL_GRAD ? 3 : 1; 733c70bd2a0Sjeremylt CeedQFunctionAddInput(qfapply, "u", ncompu*inscale, 734199551f5Sjeremylt bpOptions[bpchoice].inmode); 735199551f5Sjeremylt CeedQFunctionAddInput(qfapply, "qdata", bpOptions[bpchoice].qdatasize, 736cb32e2e7SValeria Barra CEED_EVAL_NONE); 737c70bd2a0Sjeremylt CeedQFunctionAddOutput(qfapply, "v", ncompu*outscale, 738199551f5Sjeremylt bpOptions[bpchoice].outmode); 739cb32e2e7SValeria Barra 740cb32e2e7SValeria Barra // Create the error qfunction 741199551f5Sjeremylt CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].error, 742199551f5Sjeremylt bpOptions[bpchoice].errorfname, &qferror); 743c70bd2a0Sjeremylt CeedQFunctionAddInput(qferror, "u", ncompu, CEED_EVAL_INTERP); 744c70bd2a0Sjeremylt CeedQFunctionAddInput(qferror, "true_soln", ncompu, CEED_EVAL_NONE); 745c70bd2a0Sjeremylt CeedQFunctionAddOutput(qferror, "error", ncompu, CEED_EVAL_NONE); 746cb32e2e7SValeria Barra 747cb32e2e7SValeria Barra // Create the persistent vectors that will be needed in setup 748cb32e2e7SValeria Barra CeedInt nqpts; 749cb32e2e7SValeria Barra CeedBasisGetNumQuadraturePoints(basisu, &nqpts); 750199551f5Sjeremylt CeedVectorCreate(ceed, bpOptions[bpchoice].qdatasize*nelem*nqpts, &qdata); 751cb32e2e7SValeria Barra CeedVectorCreate(ceed, nelem*nqpts*ncompu, &target); 752cb32e2e7SValeria Barra CeedVectorCreate(ceed, lsize*ncompu, &rhsceed); 753cb32e2e7SValeria Barra 754cb32e2e7SValeria Barra // Create the operator that builds the quadrature data for the ceed operator 755c70bd2a0Sjeremylt CeedOperatorCreate(ceed, qfsetupgeo, CEED_QFUNCTION_NONE, 756c70bd2a0Sjeremylt CEED_QFUNCTION_NONE, &opsetupgeo); 757c70bd2a0Sjeremylt CeedOperatorSetField(opsetupgeo, "dx", Erestrictx, basisx, 758a8d32208Sjeremylt CEED_VECTOR_ACTIVE); 759c70bd2a0Sjeremylt CeedOperatorSetField(opsetupgeo, "weight", CEED_ELEMRESTRICTION_NONE, basisx, 760a8d32208Sjeremylt CEED_VECTOR_NONE); 761c70bd2a0Sjeremylt CeedOperatorSetField(opsetupgeo, "qdata", Erestrictqdi, 762cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 763cb32e2e7SValeria Barra 764cb32e2e7SValeria Barra // Create the operator that builds the RHS and true solution 765c70bd2a0Sjeremylt CeedOperatorCreate(ceed, qfsetuprhs, CEED_QFUNCTION_NONE, 766c70bd2a0Sjeremylt CEED_QFUNCTION_NONE, &opsetuprhs); 767c70bd2a0Sjeremylt CeedOperatorSetField(opsetuprhs, "x", Erestrictx, basisx, 768a8d32208Sjeremylt CEED_VECTOR_ACTIVE); 769c70bd2a0Sjeremylt CeedOperatorSetField(opsetuprhs, "dx", Erestrictx, basisx, 770a8d32208Sjeremylt CEED_VECTOR_ACTIVE); 771c70bd2a0Sjeremylt CeedOperatorSetField(opsetuprhs, "weight", CEED_ELEMRESTRICTION_NONE, basisx, 772a8d32208Sjeremylt CEED_VECTOR_NONE); 773c70bd2a0Sjeremylt CeedOperatorSetField(opsetuprhs, "true_soln", Erestrictui, 774cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, target); 775c70bd2a0Sjeremylt CeedOperatorSetField(opsetuprhs, "rhs", Erestrictu, basisu, 776a8d32208Sjeremylt CEED_VECTOR_ACTIVE); 777cb32e2e7SValeria Barra 778cb32e2e7SValeria Barra // Create the mass or diff operator 779c70bd2a0Sjeremylt CeedOperatorCreate(ceed, qfapply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 780c70bd2a0Sjeremylt &opapply); 781c70bd2a0Sjeremylt CeedOperatorSetField(opapply, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 782c70bd2a0Sjeremylt CeedOperatorSetField(opapply, "qdata", Erestrictqdi, CEED_BASIS_COLLOCATED, 783a8d32208Sjeremylt qdata); 784c70bd2a0Sjeremylt CeedOperatorSetField(opapply, "v", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 785cb32e2e7SValeria Barra 786cb32e2e7SValeria Barra // Create the error operator 787c70bd2a0Sjeremylt CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 788c70bd2a0Sjeremylt &operror); 789c70bd2a0Sjeremylt CeedOperatorSetField(operror, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 790c70bd2a0Sjeremylt CeedOperatorSetField(operror, "true_soln", Erestrictui, 791cb32e2e7SValeria Barra CEED_BASIS_COLLOCATED, target); 792c70bd2a0Sjeremylt CeedOperatorSetField(operror, "error", Erestrictui, CEED_BASIS_COLLOCATED, 793a8d32208Sjeremylt CEED_VECTOR_ACTIVE); 794cb32e2e7SValeria Barra 795cb32e2e7SValeria Barra // Set up Mat 796cb32e2e7SValeria Barra ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 797cb32e2e7SValeria Barra user->comm = comm; 798cb32e2e7SValeria Barra user->ltog = ltog; 799199551f5Sjeremylt if (bpchoice != CEED_BP1 && bpchoice != CEED_BP2) { 800cb32e2e7SValeria Barra user->ltog0 = ltog0; 801cb32e2e7SValeria Barra user->gtogD = gtogD; 802cb32e2e7SValeria Barra } 803cb32e2e7SValeria Barra user->Xloc = Xloc; 804cb32e2e7SValeria Barra ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 805cb32e2e7SValeria Barra CeedVectorCreate(ceed, lsize*ncompu, &user->xceed); 806cb32e2e7SValeria Barra CeedVectorCreate(ceed, lsize*ncompu, &user->yceed); 807c70bd2a0Sjeremylt user->op = opapply; 808cb32e2e7SValeria Barra user->qdata = qdata; 809cb32e2e7SValeria Barra user->ceed = ceed; 810*9396343dSjeremylt user->memtype = memtyperequested; 811*9396343dSjeremylt if (memtyperequested == CEED_MEM_HOST) { 812*9396343dSjeremylt user->VecGetArray = VecGetArray; 813*9396343dSjeremylt user->VecGetArrayRead = VecGetArrayRead; 814*9396343dSjeremylt user->VecRestoreArray = VecRestoreArray; 815*9396343dSjeremylt user->VecRestoreArrayRead = VecRestoreArrayRead; 816*9396343dSjeremylt } else { 817*9396343dSjeremylt user->VecGetArray = VecCUDAGetArray; 818*9396343dSjeremylt user->VecGetArrayRead = VecCUDAGetArrayRead; 819*9396343dSjeremylt user->VecRestoreArray = VecCUDARestoreArray; 820*9396343dSjeremylt user->VecRestoreArrayRead = VecCUDARestoreArrayRead; 821*9396343dSjeremylt } 822cb32e2e7SValeria Barra 823cb32e2e7SValeria Barra ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 824cb32e2e7SValeria Barra mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 825cb32e2e7SValeria Barra PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 826199551f5Sjeremylt if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) { 827cb32e2e7SValeria Barra ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 828cb32e2e7SValeria Barra CHKERRQ(ierr); 829cb32e2e7SValeria Barra } else { 830cb32e2e7SValeria Barra ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 831cb32e2e7SValeria Barra CHKERRQ(ierr); 832cb32e2e7SValeria Barra } 833*9396343dSjeremylt if (user->memtype == CEED_MEM_DEVICE) { 834*9396343dSjeremylt ierr = MatShellSetVecType(mat, VECCUDA); CHKERRQ(ierr); 835*9396343dSjeremylt } 836cb32e2e7SValeria Barra 837cb32e2e7SValeria Barra // Get RHS vector 838*9396343dSjeremylt ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); 839cb32e2e7SValeria Barra ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 840cb32e2e7SValeria Barra ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 841*9396343dSjeremylt ierr = user->VecGetArray(rhsloc, &r); CHKERRQ(ierr); 842*9396343dSjeremylt CeedVectorSetArray(rhsceed, user->memtype, CEED_USE_POINTER, r); 843cb32e2e7SValeria Barra 844cb32e2e7SValeria Barra // Setup qdata, rhs, and target 845c70bd2a0Sjeremylt CeedOperatorApply(opsetupgeo, xcoord, qdata, CEED_REQUEST_IMMEDIATE); 846c70bd2a0Sjeremylt CeedOperatorApply(opsetuprhs, xcoord, rhsceed, CEED_REQUEST_IMMEDIATE); 847*9396343dSjeremylt ierr = CeedVectorSyncArray(rhsceed, user->memtype); CHKERRQ(ierr); 848cb32e2e7SValeria Barra CeedVectorDestroy(&xcoord); 849cb32e2e7SValeria Barra 850cb32e2e7SValeria Barra // Gather RHS 851*9396343dSjeremylt ierr = user->VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 852cb32e2e7SValeria Barra ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 853cb32e2e7SValeria Barra ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 854cb32e2e7SValeria Barra CHKERRQ(ierr); 855cb32e2e7SValeria Barra ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 856cb32e2e7SValeria Barra CHKERRQ(ierr); 857cb32e2e7SValeria Barra CeedVectorDestroy(&rhsceed); 858cb32e2e7SValeria Barra 859cb32e2e7SValeria Barra ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 860cb32e2e7SValeria Barra { 861cb32e2e7SValeria Barra PC pc; 862cb32e2e7SValeria Barra ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 863199551f5Sjeremylt if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) { 864cb32e2e7SValeria Barra ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 865cb32e2e7SValeria Barra ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 866cb32e2e7SValeria Barra } else { 867cb32e2e7SValeria Barra ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 868cb32e2e7SValeria Barra } 869cb32e2e7SValeria Barra ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 870cb32e2e7SValeria Barra ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 871cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 872cb32e2e7SValeria Barra PETSC_DEFAULT); CHKERRQ(ierr); 873cb32e2e7SValeria Barra } 874cb32e2e7SValeria Barra ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 875cb32e2e7SValeria Barra ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 876cb32e2e7SValeria Barra // First run, if benchmarking 877cb32e2e7SValeria Barra if (benchmark_mode) { 878cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 879cb32e2e7SValeria Barra CHKERRQ(ierr); 880cb32e2e7SValeria Barra my_rt_start = MPI_Wtime(); 881cb32e2e7SValeria Barra ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 882cb32e2e7SValeria Barra my_rt = MPI_Wtime() - my_rt_start; 883cb32e2e7SValeria Barra ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); 884cb32e2e7SValeria Barra CHKERRQ(ierr); 885cb32e2e7SValeria Barra // Set maxits based on first iteration timing 886cb32e2e7SValeria Barra if (my_rt > 0.02) { 887cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 888cb32e2e7SValeria Barra CHKERRQ(ierr); 889cb32e2e7SValeria Barra } else { 890cb32e2e7SValeria Barra ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 891cb32e2e7SValeria Barra CHKERRQ(ierr); 892cb32e2e7SValeria Barra } 893cb32e2e7SValeria Barra } 894cb32e2e7SValeria Barra // Timed solve 895cb32e2e7SValeria Barra ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 896cb32e2e7SValeria Barra my_rt_start = MPI_Wtime(); 897cb32e2e7SValeria Barra ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 898cb32e2e7SValeria Barra my_rt = MPI_Wtime() - my_rt_start; 899cb32e2e7SValeria Barra { 900cb32e2e7SValeria Barra KSPType ksptype; 901cb32e2e7SValeria Barra KSPConvergedReason reason; 902cb32e2e7SValeria Barra PetscReal rnorm; 903cb32e2e7SValeria Barra PetscInt its; 904cb32e2e7SValeria Barra ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 905cb32e2e7SValeria Barra ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 906cb32e2e7SValeria Barra ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 907cb32e2e7SValeria Barra ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 908cb32e2e7SValeria Barra if (!test_mode || reason < 0 || rnorm > 1e-8) { 909dc7d240cSValeria Barra ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); 910dc7d240cSValeria Barra CHKERRQ(ierr); 911dc7d240cSValeria Barra ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); 912dc7d240cSValeria Barra CHKERRQ(ierr); 913cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 914cb32e2e7SValeria Barra " KSP:\n" 915cb32e2e7SValeria Barra " KSP Type : %s\n" 916cb32e2e7SValeria Barra " KSP Convergence : %s\n" 917cb32e2e7SValeria Barra " Total KSP Iterations : %D\n" 918cb32e2e7SValeria Barra " Final rnorm : %e\n", 919cb32e2e7SValeria Barra ksptype, KSPConvergedReasons[reason], its, 920cb32e2e7SValeria Barra (double)rnorm); CHKERRQ(ierr); 921dc7d240cSValeria Barra ierr = PetscPrintf(comm, 922dc7d240cSValeria Barra " Performance:\n" 923dc7d240cSValeria Barra " CG Solve Time : %g (%g) sec\n" 924dc7d240cSValeria Barra " DoFs/Sec in CG : %g (%g) million\n", 925dc7d240cSValeria Barra rt_max, rt_min, 1e-6*gsize*its/rt_max, 926dc7d240cSValeria Barra 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 927cb32e2e7SValeria Barra } 928cb32e2e7SValeria Barra if (benchmark_mode && (!test_mode)) { 929cb32e2e7SValeria Barra ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); 930cb32e2e7SValeria Barra CHKERRQ(ierr); 931cb32e2e7SValeria Barra ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); 932cb32e2e7SValeria Barra CHKERRQ(ierr); 933cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 934cb32e2e7SValeria Barra " Performance:\n" 935cb32e2e7SValeria Barra " CG Solve Time : %g (%g) sec\n" 936cb32e2e7SValeria Barra " DoFs/Sec in CG : %g (%g) million\n", 937cb32e2e7SValeria Barra rt_max, rt_min, 1e-6*gsize*its/rt_max, 938cb32e2e7SValeria Barra 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 939cb32e2e7SValeria Barra } 940cb32e2e7SValeria Barra } 941cb32e2e7SValeria Barra 942cb32e2e7SValeria Barra { 943cb32e2e7SValeria Barra PetscReal maxerror; 944c70bd2a0Sjeremylt ierr = ComputeErrorMax(user, operror, X, target, &maxerror); CHKERRQ(ierr); 94582311801Sjeremylt PetscReal tol = 5e-2; 946cb32e2e7SValeria Barra if (!test_mode || maxerror > tol) { 947cb32e2e7SValeria Barra ierr = PetscPrintf(comm, 948cb32e2e7SValeria Barra " Pointwise Error (max) : %e\n", 949cb32e2e7SValeria Barra (double)maxerror); CHKERRQ(ierr); 950cb32e2e7SValeria Barra } 951cb32e2e7SValeria Barra } 952cb32e2e7SValeria Barra 953cb32e2e7SValeria Barra if (write_solution) { 954cb32e2e7SValeria Barra PetscViewer vtkviewersoln; 955cb32e2e7SValeria Barra 956cb32e2e7SValeria Barra ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr); 957cb32e2e7SValeria Barra ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 958cb32e2e7SValeria Barra ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 959cb32e2e7SValeria Barra ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 960cb32e2e7SValeria Barra ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 961cb32e2e7SValeria Barra } 962cb32e2e7SValeria Barra 963cb32e2e7SValeria Barra ierr = VecDestroy(&rhs); CHKERRQ(ierr); 964cb32e2e7SValeria Barra ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 965cb32e2e7SValeria Barra ierr = VecDestroy(&X); CHKERRQ(ierr); 966cb32e2e7SValeria Barra ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 967cb32e2e7SValeria Barra ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 968cb32e2e7SValeria Barra ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 969cb32e2e7SValeria Barra ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 970cb32e2e7SValeria Barra ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 971cb32e2e7SValeria Barra ierr = MatDestroy(&mat); CHKERRQ(ierr); 972cb32e2e7SValeria Barra ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 973cb32e2e7SValeria Barra 974cb32e2e7SValeria Barra CeedVectorDestroy(&user->xceed); 975cb32e2e7SValeria Barra CeedVectorDestroy(&user->yceed); 976cb32e2e7SValeria Barra CeedVectorDestroy(&user->qdata); 977cb32e2e7SValeria Barra CeedVectorDestroy(&target); 978c70bd2a0Sjeremylt CeedOperatorDestroy(&opsetupgeo); 979c70bd2a0Sjeremylt CeedOperatorDestroy(&opsetuprhs); 980c70bd2a0Sjeremylt CeedOperatorDestroy(&opapply); 981c70bd2a0Sjeremylt CeedOperatorDestroy(&operror); 982cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictu); 983cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictx); 984cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictui); 985cb32e2e7SValeria Barra CeedElemRestrictionDestroy(&Erestrictqdi); 986c70bd2a0Sjeremylt CeedQFunctionDestroy(&qfsetupgeo); 987c70bd2a0Sjeremylt CeedQFunctionDestroy(&qfsetuprhs); 988c70bd2a0Sjeremylt CeedQFunctionDestroy(&qfapply); 989c70bd2a0Sjeremylt CeedQFunctionDestroy(&qferror); 990cb32e2e7SValeria Barra CeedBasisDestroy(&basisu); 991cb32e2e7SValeria Barra CeedBasisDestroy(&basisx); 992cb32e2e7SValeria Barra CeedDestroy(&ceed); 993cb32e2e7SValeria Barra ierr = PetscFree(user); CHKERRQ(ierr); 994cb32e2e7SValeria Barra return PetscFinalize(); 995cb32e2e7SValeria Barra } 996