1*89c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*89c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*89c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 4*89c6efa4Sjeremylt // 5*89c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 6*89c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 7*89c6efa4Sjeremylt // element discretizations for exascale applications. For more information and 8*89c6efa4Sjeremylt // source code availability see http://github.com/ceed. 9*89c6efa4Sjeremylt // 10*89c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*89c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 12*89c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 13*89c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 14*89c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early 15*89c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 16*89c6efa4Sjeremylt 17*89c6efa4Sjeremylt #include <string.h> 18*89c6efa4Sjeremylt #include "ceed-opt.h" 19*89c6efa4Sjeremylt #include "../ref/ceed-ref.h" 20*89c6efa4Sjeremylt 21*89c6efa4Sjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) { 22*89c6efa4Sjeremylt int ierr; 23*89c6efa4Sjeremylt CeedOperator_Opt *impl; 24*89c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 25*89c6efa4Sjeremylt 26*89c6efa4Sjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27*89c6efa4Sjeremylt ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28*89c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29*89c6efa4Sjeremylt } 30*89c6efa4Sjeremylt ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31*89c6efa4Sjeremylt ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32*89c6efa4Sjeremylt ierr = CeedFree(&impl->edata); CeedChk(ierr); 33*89c6efa4Sjeremylt ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 34*89c6efa4Sjeremylt 35*89c6efa4Sjeremylt for (CeedInt i=0; i<impl->numein; i++) { 36*89c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 37*89c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 38*89c6efa4Sjeremylt } 39*89c6efa4Sjeremylt ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 40*89c6efa4Sjeremylt ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 41*89c6efa4Sjeremylt 42*89c6efa4Sjeremylt for (CeedInt i=0; i<impl->numeout; i++) { 43*89c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 44*89c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 45*89c6efa4Sjeremylt } 46*89c6efa4Sjeremylt ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 47*89c6efa4Sjeremylt ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 48*89c6efa4Sjeremylt 49*89c6efa4Sjeremylt ierr = CeedFree(&impl); CeedChk(ierr); 50*89c6efa4Sjeremylt return 0; 51*89c6efa4Sjeremylt } 52*89c6efa4Sjeremylt 53*89c6efa4Sjeremylt /* 54*89c6efa4Sjeremylt Setup infields or outfields 55*89c6efa4Sjeremylt */ 56*89c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 57*89c6efa4Sjeremylt bool inOrOut, const CeedInt blksize, 58*89c6efa4Sjeremylt CeedElemRestriction *blkrestr, 59*89c6efa4Sjeremylt CeedVector *fullevecs, CeedVector *evecs, 60*89c6efa4Sjeremylt CeedVector *qvecs, CeedInt starte, 61*89c6efa4Sjeremylt CeedInt numfields, CeedInt Q) { 62*89c6efa4Sjeremylt CeedInt dim, ierr, ncomp, P; 63*89c6efa4Sjeremylt Ceed ceed; 64*89c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 65*89c6efa4Sjeremylt CeedBasis basis; 66*89c6efa4Sjeremylt CeedElemRestriction r; 67*89c6efa4Sjeremylt CeedOperatorField *opfields; 68*89c6efa4Sjeremylt CeedQFunctionField *qffields; 69*89c6efa4Sjeremylt if (inOrOut) { 70*89c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, NULL, &opfields); 71*89c6efa4Sjeremylt CeedChk(ierr); 72*89c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 73*89c6efa4Sjeremylt CeedChk(ierr); 74*89c6efa4Sjeremylt } else { 75*89c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opfields, NULL); 76*89c6efa4Sjeremylt CeedChk(ierr); 77*89c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 78*89c6efa4Sjeremylt CeedChk(ierr); 79*89c6efa4Sjeremylt } 80*89c6efa4Sjeremylt 81*89c6efa4Sjeremylt // Loop over fields 82*89c6efa4Sjeremylt for (CeedInt i=0; i<numfields; i++) { 83*89c6efa4Sjeremylt CeedEvalMode emode; 84*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85*89c6efa4Sjeremylt 86*89c6efa4Sjeremylt if (emode != CEED_EVAL_WEIGHT) { 87*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88*89c6efa4Sjeremylt CeedChk(ierr); 89*89c6efa4Sjeremylt CeedElemRestriction_Ref *data; 90*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91*89c6efa4Sjeremylt Ceed ceed; 92*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93*89c6efa4Sjeremylt CeedInt nelem, elemsize, ndof; 94*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumDoF(r, &ndof); CeedChk(ierr); 97*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98*89c6efa4Sjeremylt ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99*89c6efa4Sjeremylt blksize, ndof, ncomp, 100*89c6efa4Sjeremylt CEED_MEM_HOST, CEED_COPY_VALUES, 101*89c6efa4Sjeremylt data->indices, &blkrestr[i+starte]); 102*89c6efa4Sjeremylt CeedChk(ierr); 103*89c6efa4Sjeremylt ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104*89c6efa4Sjeremylt &fullevecs[i+starte]); 105*89c6efa4Sjeremylt CeedChk(ierr); 106*89c6efa4Sjeremylt } 107*89c6efa4Sjeremylt 108*89c6efa4Sjeremylt switch(emode) { 109*89c6efa4Sjeremylt case CEED_EVAL_NONE: 110*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 111*89c6efa4Sjeremylt CeedChk(ierr); 112*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &evecs[i]); CeedChk(ierr); 113*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &qvecs[i]); CeedChk(ierr); 114*89c6efa4Sjeremylt break; 115*89c6efa4Sjeremylt case CEED_EVAL_INTERP: 116*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 117*89c6efa4Sjeremylt CeedChk(ierr); 118*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &P); 119*89c6efa4Sjeremylt CeedChk(ierr); 120*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, P*ncomp*blksize, &evecs[i]); CeedChk(ierr); 121*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &qvecs[i]); CeedChk(ierr); 122*89c6efa4Sjeremylt break; 123*89c6efa4Sjeremylt case CEED_EVAL_GRAD: 124*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 125*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 126*89c6efa4Sjeremylt CeedChk(ierr); 127*89c6efa4Sjeremylt ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 128*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &P); 129*89c6efa4Sjeremylt CeedChk(ierr); 130*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, P*ncomp*blksize, &evecs[i]); CeedChk(ierr); 131*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*dim*blksize, &qvecs[i]); CeedChk(ierr); 132*89c6efa4Sjeremylt break; 133*89c6efa4Sjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 134*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 135*89c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 136*89c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 137*89c6efa4Sjeremylt CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 138*89c6efa4Sjeremylt 139*89c6efa4Sjeremylt break; 140*89c6efa4Sjeremylt case CEED_EVAL_DIV: 141*89c6efa4Sjeremylt break; // Not implimented 142*89c6efa4Sjeremylt case CEED_EVAL_CURL: 143*89c6efa4Sjeremylt break; // Not implimented 144*89c6efa4Sjeremylt } 145*89c6efa4Sjeremylt } 146*89c6efa4Sjeremylt return 0; 147*89c6efa4Sjeremylt } 148*89c6efa4Sjeremylt 149*89c6efa4Sjeremylt /* 150*89c6efa4Sjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 151*89c6efa4Sjeremylt to the named inputs and outputs of its CeedQFunction. 152*89c6efa4Sjeremylt */ 153*89c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) { 154*89c6efa4Sjeremylt int ierr; 155*89c6efa4Sjeremylt bool setupdone; 156*89c6efa4Sjeremylt ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 157*89c6efa4Sjeremylt if (setupdone) return 0; 158*89c6efa4Sjeremylt Ceed ceed; 159*89c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 160*89c6efa4Sjeremylt Ceed_Opt *ceedimpl; 161*89c6efa4Sjeremylt ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 162*89c6efa4Sjeremylt const CeedInt blksize = ceedimpl->blksize; 163*89c6efa4Sjeremylt CeedOperator_Opt *impl; 164*89c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 165*89c6efa4Sjeremylt CeedQFunction qf; 166*89c6efa4Sjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 167*89c6efa4Sjeremylt CeedInt Q, numinputfields, numoutputfields; 168*89c6efa4Sjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 169*89c6efa4Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 170*89c6efa4Sjeremylt CeedChk(ierr); 171*89c6efa4Sjeremylt CeedOperatorField *opinputfields, *opoutputfields; 172*89c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 173*89c6efa4Sjeremylt CeedChk(ierr); 174*89c6efa4Sjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 175*89c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 176*89c6efa4Sjeremylt CeedChk(ierr); 177*89c6efa4Sjeremylt 178*89c6efa4Sjeremylt // Allocate 179*89c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 180*89c6efa4Sjeremylt CeedChk(ierr); 181*89c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 182*89c6efa4Sjeremylt CeedChk(ierr); 183*89c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 184*89c6efa4Sjeremylt CeedChk(ierr); 185*89c6efa4Sjeremylt 186*89c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 187*89c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 188*89c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 189*89c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 190*89c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 191*89c6efa4Sjeremylt 192*89c6efa4Sjeremylt impl->numein = numinputfields; impl->numeout = numoutputfields; 193*89c6efa4Sjeremylt 194*89c6efa4Sjeremylt // Set up infield and outfield pointer arrays 195*89c6efa4Sjeremylt // Infields 196*89c6efa4Sjeremylt ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 197*89c6efa4Sjeremylt impl->evecs, impl->evecsin, 198*89c6efa4Sjeremylt impl->qvecsin, 0, 199*89c6efa4Sjeremylt numinputfields, Q); 200*89c6efa4Sjeremylt CeedChk(ierr); 201*89c6efa4Sjeremylt // Outfields 202*89c6efa4Sjeremylt ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 203*89c6efa4Sjeremylt impl->evecs, impl->evecsout, 204*89c6efa4Sjeremylt impl->qvecsout, numinputfields, 205*89c6efa4Sjeremylt numoutputfields, Q); 206*89c6efa4Sjeremylt CeedChk(ierr); 207*89c6efa4Sjeremylt 208*89c6efa4Sjeremylt ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 209*89c6efa4Sjeremylt 210*89c6efa4Sjeremylt return 0; 211*89c6efa4Sjeremylt } 212*89c6efa4Sjeremylt 213*89c6efa4Sjeremylt static inline int CeedOperatorApply_Opt(CeedOperator op, 214*89c6efa4Sjeremylt const CeedInt blksize, CeedVector invec, 215*89c6efa4Sjeremylt CeedVector outvec, 216*89c6efa4Sjeremylt CeedRequest *request) { 217*89c6efa4Sjeremylt int ierr; 218*89c6efa4Sjeremylt CeedOperator_Opt *impl; 219*89c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 220*89c6efa4Sjeremylt CeedInt Q, elemsize, numinputfields, numoutputfields, numelements, ncomp; 221*89c6efa4Sjeremylt ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 222*89c6efa4Sjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 223*89c6efa4Sjeremylt CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 224*89c6efa4Sjeremylt CeedQFunction qf; 225*89c6efa4Sjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 226*89c6efa4Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 227*89c6efa4Sjeremylt CeedChk(ierr); 228*89c6efa4Sjeremylt CeedTransposeMode lmode; 229*89c6efa4Sjeremylt CeedOperatorField *opinputfields, *opoutputfields; 230*89c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 231*89c6efa4Sjeremylt CeedChk(ierr); 232*89c6efa4Sjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 233*89c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 234*89c6efa4Sjeremylt CeedChk(ierr); 235*89c6efa4Sjeremylt CeedEvalMode emode; 236*89c6efa4Sjeremylt CeedVector vec; 237*89c6efa4Sjeremylt CeedBasis basis; 238*89c6efa4Sjeremylt CeedElemRestriction Erestrict; 239*89c6efa4Sjeremylt uint64_t state; 240*89c6efa4Sjeremylt 241*89c6efa4Sjeremylt // Setup 242*89c6efa4Sjeremylt ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 243*89c6efa4Sjeremylt 244*89c6efa4Sjeremylt // Input Evecs and Restriction 245*89c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 246*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 247*89c6efa4Sjeremylt CeedChk(ierr); 248*89c6efa4Sjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 249*89c6efa4Sjeremylt } else { 250*89c6efa4Sjeremylt // Get input vector 251*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 252*89c6efa4Sjeremylt if (vec != CEED_VECTOR_ACTIVE) { 253*89c6efa4Sjeremylt // Restrict 254*89c6efa4Sjeremylt ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 255*89c6efa4Sjeremylt if (state != impl->inputstate[i]) { 256*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 257*89c6efa4Sjeremylt CeedChk(ierr); 258*89c6efa4Sjeremylt ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 259*89c6efa4Sjeremylt lmode, vec, impl->evecs[i], request); 260*89c6efa4Sjeremylt CeedChk(ierr); 261*89c6efa4Sjeremylt impl->inputstate[i] = state; 262*89c6efa4Sjeremylt } 263*89c6efa4Sjeremylt } else { 264*89c6efa4Sjeremylt // Set Qvec for CEED_EVAL_NONE 265*89c6efa4Sjeremylt if (emode == CEED_EVAL_NONE) { 266*89c6efa4Sjeremylt ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 267*89c6efa4Sjeremylt &impl->edata[i]); CeedChk(ierr); 268*89c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 269*89c6efa4Sjeremylt CEED_USE_POINTER, 270*89c6efa4Sjeremylt impl->edata[i]); CeedChk(ierr); 271*89c6efa4Sjeremylt ierr = CeedVectorRestoreArray(impl->evecsin[i], 272*89c6efa4Sjeremylt &impl->edata[i]); CeedChk(ierr); 273*89c6efa4Sjeremylt } 274*89c6efa4Sjeremylt } 275*89c6efa4Sjeremylt // Get evec 276*89c6efa4Sjeremylt ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 277*89c6efa4Sjeremylt (const CeedScalar **) &impl->edata[i]); 278*89c6efa4Sjeremylt CeedChk(ierr); 279*89c6efa4Sjeremylt } 280*89c6efa4Sjeremylt } 281*89c6efa4Sjeremylt 282*89c6efa4Sjeremylt // Output Lvecs, Evecs, and Qvecs 283*89c6efa4Sjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 284*89c6efa4Sjeremylt // Zero Lvecs 285*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 286*89c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) { 287*89c6efa4Sjeremylt if (!impl->add) { 288*89c6efa4Sjeremylt vec = outvec; 289*89c6efa4Sjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 290*89c6efa4Sjeremylt } 291*89c6efa4Sjeremylt } else { 292*89c6efa4Sjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 293*89c6efa4Sjeremylt } 294*89c6efa4Sjeremylt // Set Qvec if needed 295*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 296*89c6efa4Sjeremylt CeedChk(ierr); 297*89c6efa4Sjeremylt if (emode == CEED_EVAL_NONE) { 298*89c6efa4Sjeremylt // Set qvec to single block evec 299*89c6efa4Sjeremylt ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 300*89c6efa4Sjeremylt &impl->edata[i + numinputfields]); 301*89c6efa4Sjeremylt CeedChk(ierr); 302*89c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 303*89c6efa4Sjeremylt CEED_USE_POINTER, 304*89c6efa4Sjeremylt impl->edata[i + numinputfields]); CeedChk(ierr); 305*89c6efa4Sjeremylt ierr = CeedVectorRestoreArray(impl->evecsout[i], 306*89c6efa4Sjeremylt &impl->edata[i + numinputfields]); 307*89c6efa4Sjeremylt CeedChk(ierr); 308*89c6efa4Sjeremylt } 309*89c6efa4Sjeremylt } 310*89c6efa4Sjeremylt impl->add = false; 311*89c6efa4Sjeremylt 312*89c6efa4Sjeremylt // Loop through elements 313*89c6efa4Sjeremylt for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 314*89c6efa4Sjeremylt // Input basis apply if needed 315*89c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 316*89c6efa4Sjeremylt CeedInt activein = 0; 317*89c6efa4Sjeremylt // Get elemsize, emode, ncomp 318*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 319*89c6efa4Sjeremylt CeedChk(ierr); 320*89c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 321*89c6efa4Sjeremylt CeedChk(ierr); 322*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 323*89c6efa4Sjeremylt CeedChk(ierr); 324*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp); 325*89c6efa4Sjeremylt CeedChk(ierr); 326*89c6efa4Sjeremylt // Restrict block active input 327*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 328*89c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) { 329*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 330*89c6efa4Sjeremylt CeedChk(ierr); 331*89c6efa4Sjeremylt ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 332*89c6efa4Sjeremylt CEED_NOTRANSPOSE, lmode, invec, 333*89c6efa4Sjeremylt impl->evecsin[i], request); 334*89c6efa4Sjeremylt CeedChk(ierr); 335*89c6efa4Sjeremylt activein = 1; 336*89c6efa4Sjeremylt } 337*89c6efa4Sjeremylt // Basis action 338*89c6efa4Sjeremylt switch(emode) { 339*89c6efa4Sjeremylt case CEED_EVAL_NONE: 340*89c6efa4Sjeremylt if (!activein) { 341*89c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 342*89c6efa4Sjeremylt CEED_USE_POINTER, 343*89c6efa4Sjeremylt &impl->edata[i][e*Q*ncomp]); CeedChk(ierr); 344*89c6efa4Sjeremylt } 345*89c6efa4Sjeremylt break; 346*89c6efa4Sjeremylt case CEED_EVAL_INTERP: 347*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 348*89c6efa4Sjeremylt CeedChk(ierr); 349*89c6efa4Sjeremylt if (!activein) { 350*89c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 351*89c6efa4Sjeremylt CEED_USE_POINTER, 352*89c6efa4Sjeremylt &impl->edata[i][e*elemsize*ncomp]); 353*89c6efa4Sjeremylt CeedChk(ierr); 354*89c6efa4Sjeremylt } 355*89c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 356*89c6efa4Sjeremylt CEED_EVAL_INTERP, impl->evecsin[i], 357*89c6efa4Sjeremylt impl->qvecsin[i]); CeedChk(ierr); 358*89c6efa4Sjeremylt break; 359*89c6efa4Sjeremylt case CEED_EVAL_GRAD: 360*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 361*89c6efa4Sjeremylt CeedChk(ierr); 362*89c6efa4Sjeremylt if (!activein) { 363*89c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 364*89c6efa4Sjeremylt CEED_USE_POINTER, 365*89c6efa4Sjeremylt &impl->edata[i][e*elemsize*ncomp]); 366*89c6efa4Sjeremylt CeedChk(ierr); 367*89c6efa4Sjeremylt } 368*89c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 369*89c6efa4Sjeremylt CEED_EVAL_GRAD, impl->evecsin[i], 370*89c6efa4Sjeremylt impl->qvecsin[i]); CeedChk(ierr); 371*89c6efa4Sjeremylt break; 372*89c6efa4Sjeremylt case CEED_EVAL_WEIGHT: 373*89c6efa4Sjeremylt break; // No action 374*89c6efa4Sjeremylt case CEED_EVAL_DIV: 375*89c6efa4Sjeremylt break; // Not implimented 376*89c6efa4Sjeremylt case CEED_EVAL_CURL: 377*89c6efa4Sjeremylt break; // Not implimented 378*89c6efa4Sjeremylt } 379*89c6efa4Sjeremylt } 380*89c6efa4Sjeremylt 381*89c6efa4Sjeremylt // Q function 382*89c6efa4Sjeremylt ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 383*89c6efa4Sjeremylt CeedChk(ierr); 384*89c6efa4Sjeremylt 385*89c6efa4Sjeremylt // Output basis apply and restrict 386*89c6efa4Sjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 387*89c6efa4Sjeremylt // Get elemsize, emode, ncomp 388*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 389*89c6efa4Sjeremylt CeedChk(ierr); 390*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 391*89c6efa4Sjeremylt CeedChk(ierr); 392*89c6efa4Sjeremylt // Basis action 393*89c6efa4Sjeremylt switch(emode) { 394*89c6efa4Sjeremylt case CEED_EVAL_NONE: 395*89c6efa4Sjeremylt break; // No action 396*89c6efa4Sjeremylt case CEED_EVAL_INTERP: 397*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 398*89c6efa4Sjeremylt CeedChk(ierr); 399*89c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 400*89c6efa4Sjeremylt CEED_EVAL_INTERP, impl->qvecsout[i], 401*89c6efa4Sjeremylt impl->evecsout[i]); CeedChk(ierr); 402*89c6efa4Sjeremylt break; 403*89c6efa4Sjeremylt case CEED_EVAL_GRAD: 404*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 405*89c6efa4Sjeremylt CeedChk(ierr); 406*89c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 407*89c6efa4Sjeremylt CEED_EVAL_GRAD, impl->qvecsout[i], 408*89c6efa4Sjeremylt impl->evecsout[i]); CeedChk(ierr); 409*89c6efa4Sjeremylt break; 410*89c6efa4Sjeremylt case CEED_EVAL_WEIGHT: { 411*89c6efa4Sjeremylt Ceed ceed; 412*89c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 413*89c6efa4Sjeremylt return CeedError(ceed, 1, 414*89c6efa4Sjeremylt "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 415*89c6efa4Sjeremylt break; // Should not occur 416*89c6efa4Sjeremylt } 417*89c6efa4Sjeremylt case CEED_EVAL_DIV: 418*89c6efa4Sjeremylt break; // Not implimented 419*89c6efa4Sjeremylt case CEED_EVAL_CURL: 420*89c6efa4Sjeremylt break; // Not implimented 421*89c6efa4Sjeremylt } 422*89c6efa4Sjeremylt // Restrict output block 423*89c6efa4Sjeremylt // Get output vector 424*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 425*89c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) 426*89c6efa4Sjeremylt vec = outvec; 427*89c6efa4Sjeremylt // Restrict 428*89c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 429*89c6efa4Sjeremylt CeedChk(ierr); 430*89c6efa4Sjeremylt ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 431*89c6efa4Sjeremylt e/blksize, CEED_TRANSPOSE, 432*89c6efa4Sjeremylt lmode, impl->evecsout[i], 433*89c6efa4Sjeremylt vec, request); CeedChk(ierr); 434*89c6efa4Sjeremylt } 435*89c6efa4Sjeremylt } 436*89c6efa4Sjeremylt 437*89c6efa4Sjeremylt // Restore input arrays 438*89c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 439*89c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 440*89c6efa4Sjeremylt CeedChk(ierr); 441*89c6efa4Sjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 442*89c6efa4Sjeremylt } else { 443*89c6efa4Sjeremylt ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 444*89c6efa4Sjeremylt (const CeedScalar **) &impl->edata[i]); 445*89c6efa4Sjeremylt CeedChk(ierr); 446*89c6efa4Sjeremylt } 447*89c6efa4Sjeremylt } 448*89c6efa4Sjeremylt 449*89c6efa4Sjeremylt return 0; 450*89c6efa4Sjeremylt } 451*89c6efa4Sjeremylt 452*89c6efa4Sjeremylt int CeedOperatorApply_Opt_1(CeedOperator op, CeedVector invec, 453*89c6efa4Sjeremylt CeedVector outvec, CeedRequest *request) { 454*89c6efa4Sjeremylt return CeedOperatorApply_Opt(op, 1, invec, outvec, request); 455*89c6efa4Sjeremylt } 456*89c6efa4Sjeremylt 457*89c6efa4Sjeremylt int CeedOperatorApply_Opt_8(CeedOperator op, CeedVector invec, 458*89c6efa4Sjeremylt CeedVector outvec, CeedRequest *request) { 459*89c6efa4Sjeremylt return CeedOperatorApply_Opt(op, 8, invec, outvec, request); 460*89c6efa4Sjeremylt } 461*89c6efa4Sjeremylt 462*89c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) { 463*89c6efa4Sjeremylt int ierr; 464*89c6efa4Sjeremylt Ceed ceed; 465*89c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 466*89c6efa4Sjeremylt Ceed_Opt *ceedimpl; 467*89c6efa4Sjeremylt ierr = CeedGetData(ceed, (void*)&ceedimpl); CeedChk(ierr); 468*89c6efa4Sjeremylt CeedInt blksize = ceedimpl->blksize; 469*89c6efa4Sjeremylt CeedOperator_Opt *impl; 470*89c6efa4Sjeremylt 471*89c6efa4Sjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 472*89c6efa4Sjeremylt ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 473*89c6efa4Sjeremylt 474*89c6efa4Sjeremylt if (blksize == 1) { 475*89c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 476*89c6efa4Sjeremylt CeedOperatorApply_Opt_1); CeedChk(ierr); 477*89c6efa4Sjeremylt } else if (blksize == 8) { 478*89c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 479*89c6efa4Sjeremylt CeedOperatorApply_Opt_8); CeedChk(ierr); 480*89c6efa4Sjeremylt } else { 481*89c6efa4Sjeremylt return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 482*89c6efa4Sjeremylt } 483*89c6efa4Sjeremylt 484*89c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 485*89c6efa4Sjeremylt CeedOperatorDestroy_Opt); CeedChk(ierr); 486*89c6efa4Sjeremylt return 0; 487*89c6efa4Sjeremylt } 488