189c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 289c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 389c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 489c6efa4Sjeremylt // 589c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 689c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 789c6efa4Sjeremylt // element discretizations for exascale applications. For more information and 889c6efa4Sjeremylt // source code availability see http://github.com/ceed. 989c6efa4Sjeremylt // 1089c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1189c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1289c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1389c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1489c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early 1589c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1689c6efa4Sjeremylt 1789c6efa4Sjeremylt #include <string.h> 1889c6efa4Sjeremylt #include "ceed-opt.h" 1989c6efa4Sjeremylt #include "../ref/ceed-ref.h" 2089c6efa4Sjeremylt 2189c6efa4Sjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) { 2289c6efa4Sjeremylt int ierr; 2389c6efa4Sjeremylt CeedOperator_Opt *impl; 2489c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 2589c6efa4Sjeremylt 2689c6efa4Sjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 2789c6efa4Sjeremylt ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 2889c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 2989c6efa4Sjeremylt } 3089c6efa4Sjeremylt ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 3189c6efa4Sjeremylt ierr = CeedFree(&impl->evecs); CeedChk(ierr); 3289c6efa4Sjeremylt ierr = CeedFree(&impl->edata); CeedChk(ierr); 3389c6efa4Sjeremylt ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 3489c6efa4Sjeremylt 3589c6efa4Sjeremylt for (CeedInt i=0; i<impl->numein; i++) { 3689c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 3789c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 3889c6efa4Sjeremylt } 3989c6efa4Sjeremylt ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 4089c6efa4Sjeremylt ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 4189c6efa4Sjeremylt 4289c6efa4Sjeremylt for (CeedInt i=0; i<impl->numeout; i++) { 4389c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 4489c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 4589c6efa4Sjeremylt } 4689c6efa4Sjeremylt ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 4789c6efa4Sjeremylt ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 4889c6efa4Sjeremylt 4989c6efa4Sjeremylt ierr = CeedFree(&impl); CeedChk(ierr); 5089c6efa4Sjeremylt return 0; 5189c6efa4Sjeremylt } 5289c6efa4Sjeremylt 5389c6efa4Sjeremylt /* 5489c6efa4Sjeremylt Setup infields or outfields 5589c6efa4Sjeremylt */ 5689c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 5789c6efa4Sjeremylt bool inOrOut, const CeedInt blksize, 5889c6efa4Sjeremylt CeedElemRestriction *blkrestr, 5989c6efa4Sjeremylt CeedVector *fullevecs, CeedVector *evecs, 6089c6efa4Sjeremylt CeedVector *qvecs, CeedInt starte, 6189c6efa4Sjeremylt CeedInt numfields, CeedInt Q) { 6289c6efa4Sjeremylt CeedInt dim, ierr, ncomp, P; 6389c6efa4Sjeremylt Ceed ceed; 6489c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 6589c6efa4Sjeremylt CeedBasis basis; 6689c6efa4Sjeremylt CeedElemRestriction r; 6789c6efa4Sjeremylt CeedOperatorField *opfields; 6889c6efa4Sjeremylt CeedQFunctionField *qffields; 6989c6efa4Sjeremylt if (inOrOut) { 7089c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, NULL, &opfields); 7189c6efa4Sjeremylt CeedChk(ierr); 7289c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 7389c6efa4Sjeremylt CeedChk(ierr); 7489c6efa4Sjeremylt } else { 7589c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opfields, NULL); 7689c6efa4Sjeremylt CeedChk(ierr); 7789c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 7889c6efa4Sjeremylt CeedChk(ierr); 7989c6efa4Sjeremylt } 8089c6efa4Sjeremylt 8189c6efa4Sjeremylt // Loop over fields 8289c6efa4Sjeremylt for (CeedInt i=0; i<numfields; i++) { 8389c6efa4Sjeremylt CeedEvalMode emode; 8489c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 8589c6efa4Sjeremylt 8689c6efa4Sjeremylt if (emode != CEED_EVAL_WEIGHT) { 8789c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 8889c6efa4Sjeremylt CeedChk(ierr); 8989c6efa4Sjeremylt CeedElemRestriction_Ref *data; 9089c6efa4Sjeremylt ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 9189c6efa4Sjeremylt Ceed ceed; 9289c6efa4Sjeremylt ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93*8795c945Sjeremylt CeedInt nelem, elemsize, nnodes; 9489c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 9589c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96*8795c945Sjeremylt ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 9789c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 9889c6efa4Sjeremylt ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99*8795c945Sjeremylt blksize, nnodes, ncomp, 10089c6efa4Sjeremylt CEED_MEM_HOST, CEED_COPY_VALUES, 10189c6efa4Sjeremylt data->indices, &blkrestr[i+starte]); 10289c6efa4Sjeremylt CeedChk(ierr); 10389c6efa4Sjeremylt ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 10489c6efa4Sjeremylt &fullevecs[i+starte]); 10589c6efa4Sjeremylt CeedChk(ierr); 10689c6efa4Sjeremylt } 10789c6efa4Sjeremylt 10889c6efa4Sjeremylt switch(emode) { 10989c6efa4Sjeremylt case CEED_EVAL_NONE: 11089c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 11189c6efa4Sjeremylt CeedChk(ierr); 11289c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &evecs[i]); CeedChk(ierr); 11389c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &qvecs[i]); CeedChk(ierr); 11489c6efa4Sjeremylt break; 11589c6efa4Sjeremylt case CEED_EVAL_INTERP: 11689c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 11789c6efa4Sjeremylt CeedChk(ierr); 11889c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &P); 11989c6efa4Sjeremylt CeedChk(ierr); 12089c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, P*ncomp*blksize, &evecs[i]); CeedChk(ierr); 12189c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &qvecs[i]); CeedChk(ierr); 12289c6efa4Sjeremylt break; 12389c6efa4Sjeremylt case CEED_EVAL_GRAD: 12489c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 12589c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 12689c6efa4Sjeremylt CeedChk(ierr); 12789c6efa4Sjeremylt ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 12889c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &P); 12989c6efa4Sjeremylt CeedChk(ierr); 13089c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, P*ncomp*blksize, &evecs[i]); CeedChk(ierr); 13189c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*dim*blksize, &qvecs[i]); CeedChk(ierr); 13289c6efa4Sjeremylt break; 13389c6efa4Sjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 13489c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 13589c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 13689c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 13789c6efa4Sjeremylt CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 13889c6efa4Sjeremylt 13989c6efa4Sjeremylt break; 14089c6efa4Sjeremylt case CEED_EVAL_DIV: 14189c6efa4Sjeremylt break; // Not implimented 14289c6efa4Sjeremylt case CEED_EVAL_CURL: 14389c6efa4Sjeremylt break; // Not implimented 14489c6efa4Sjeremylt } 14589c6efa4Sjeremylt } 14689c6efa4Sjeremylt return 0; 14789c6efa4Sjeremylt } 14889c6efa4Sjeremylt 14989c6efa4Sjeremylt /* 15089c6efa4Sjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 15189c6efa4Sjeremylt to the named inputs and outputs of its CeedQFunction. 15289c6efa4Sjeremylt */ 15389c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) { 15489c6efa4Sjeremylt int ierr; 15589c6efa4Sjeremylt bool setupdone; 15689c6efa4Sjeremylt ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 15789c6efa4Sjeremylt if (setupdone) return 0; 15889c6efa4Sjeremylt Ceed ceed; 15989c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 16089c6efa4Sjeremylt Ceed_Opt *ceedimpl; 16189c6efa4Sjeremylt ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 16289c6efa4Sjeremylt const CeedInt blksize = ceedimpl->blksize; 16389c6efa4Sjeremylt CeedOperator_Opt *impl; 16489c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 16589c6efa4Sjeremylt CeedQFunction qf; 16689c6efa4Sjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 16789c6efa4Sjeremylt CeedInt Q, numinputfields, numoutputfields; 16889c6efa4Sjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 16989c6efa4Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 17089c6efa4Sjeremylt CeedChk(ierr); 17189c6efa4Sjeremylt CeedOperatorField *opinputfields, *opoutputfields; 17289c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 17389c6efa4Sjeremylt CeedChk(ierr); 17489c6efa4Sjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 17589c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 17689c6efa4Sjeremylt CeedChk(ierr); 17789c6efa4Sjeremylt 17889c6efa4Sjeremylt // Allocate 17989c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 18089c6efa4Sjeremylt CeedChk(ierr); 18189c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 18289c6efa4Sjeremylt CeedChk(ierr); 18389c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 18489c6efa4Sjeremylt CeedChk(ierr); 18589c6efa4Sjeremylt 18689c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 18789c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 18889c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 18989c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 19089c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 19189c6efa4Sjeremylt 19289c6efa4Sjeremylt impl->numein = numinputfields; impl->numeout = numoutputfields; 19389c6efa4Sjeremylt 19489c6efa4Sjeremylt // Set up infield and outfield pointer arrays 19589c6efa4Sjeremylt // Infields 19689c6efa4Sjeremylt ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 19789c6efa4Sjeremylt impl->evecs, impl->evecsin, 19889c6efa4Sjeremylt impl->qvecsin, 0, 19989c6efa4Sjeremylt numinputfields, Q); 20089c6efa4Sjeremylt CeedChk(ierr); 20189c6efa4Sjeremylt // Outfields 20289c6efa4Sjeremylt ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 20389c6efa4Sjeremylt impl->evecs, impl->evecsout, 20489c6efa4Sjeremylt impl->qvecsout, numinputfields, 20589c6efa4Sjeremylt numoutputfields, Q); 20689c6efa4Sjeremylt CeedChk(ierr); 20789c6efa4Sjeremylt 20889c6efa4Sjeremylt ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 20989c6efa4Sjeremylt 21089c6efa4Sjeremylt return 0; 21189c6efa4Sjeremylt } 21289c6efa4Sjeremylt 21389c6efa4Sjeremylt static inline int CeedOperatorApply_Opt(CeedOperator op, 21489c6efa4Sjeremylt const CeedInt blksize, CeedVector invec, 21589c6efa4Sjeremylt CeedVector outvec, 21689c6efa4Sjeremylt CeedRequest *request) { 21789c6efa4Sjeremylt int ierr; 21889c6efa4Sjeremylt CeedOperator_Opt *impl; 21989c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 22089c6efa4Sjeremylt CeedInt Q, elemsize, numinputfields, numoutputfields, numelements, ncomp; 22189c6efa4Sjeremylt ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 22289c6efa4Sjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 22389c6efa4Sjeremylt CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 22489c6efa4Sjeremylt CeedQFunction qf; 22589c6efa4Sjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 22689c6efa4Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 22789c6efa4Sjeremylt CeedChk(ierr); 22889c6efa4Sjeremylt CeedTransposeMode lmode; 22989c6efa4Sjeremylt CeedOperatorField *opinputfields, *opoutputfields; 23089c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 23189c6efa4Sjeremylt CeedChk(ierr); 23289c6efa4Sjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 23389c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 23489c6efa4Sjeremylt CeedChk(ierr); 23589c6efa4Sjeremylt CeedEvalMode emode; 23689c6efa4Sjeremylt CeedVector vec; 23789c6efa4Sjeremylt CeedBasis basis; 23889c6efa4Sjeremylt CeedElemRestriction Erestrict; 23989c6efa4Sjeremylt uint64_t state; 24089c6efa4Sjeremylt 24189c6efa4Sjeremylt // Setup 24289c6efa4Sjeremylt ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 24389c6efa4Sjeremylt 24489c6efa4Sjeremylt // Input Evecs and Restriction 24589c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 24689c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 24789c6efa4Sjeremylt CeedChk(ierr); 24889c6efa4Sjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 24989c6efa4Sjeremylt } else { 25089c6efa4Sjeremylt // Get input vector 25189c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 25289c6efa4Sjeremylt if (vec != CEED_VECTOR_ACTIVE) { 25389c6efa4Sjeremylt // Restrict 25489c6efa4Sjeremylt ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 25589c6efa4Sjeremylt if (state != impl->inputstate[i]) { 25689c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 25789c6efa4Sjeremylt CeedChk(ierr); 25889c6efa4Sjeremylt ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 25989c6efa4Sjeremylt lmode, vec, impl->evecs[i], request); 26089c6efa4Sjeremylt CeedChk(ierr); 26189c6efa4Sjeremylt impl->inputstate[i] = state; 26289c6efa4Sjeremylt } 26389c6efa4Sjeremylt } else { 26489c6efa4Sjeremylt // Set Qvec for CEED_EVAL_NONE 26589c6efa4Sjeremylt if (emode == CEED_EVAL_NONE) { 26689c6efa4Sjeremylt ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 26789c6efa4Sjeremylt &impl->edata[i]); CeedChk(ierr); 26889c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 26989c6efa4Sjeremylt CEED_USE_POINTER, 27089c6efa4Sjeremylt impl->edata[i]); CeedChk(ierr); 27189c6efa4Sjeremylt ierr = CeedVectorRestoreArray(impl->evecsin[i], 27289c6efa4Sjeremylt &impl->edata[i]); CeedChk(ierr); 27389c6efa4Sjeremylt } 27489c6efa4Sjeremylt } 27589c6efa4Sjeremylt // Get evec 27689c6efa4Sjeremylt ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 27789c6efa4Sjeremylt (const CeedScalar **) &impl->edata[i]); 27889c6efa4Sjeremylt CeedChk(ierr); 27989c6efa4Sjeremylt } 28089c6efa4Sjeremylt } 28189c6efa4Sjeremylt 28289c6efa4Sjeremylt // Output Lvecs, Evecs, and Qvecs 28389c6efa4Sjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 28489c6efa4Sjeremylt // Zero Lvecs 28589c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 28689c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) { 28789c6efa4Sjeremylt if (!impl->add) { 28889c6efa4Sjeremylt vec = outvec; 28989c6efa4Sjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 29089c6efa4Sjeremylt } 29189c6efa4Sjeremylt } else { 29289c6efa4Sjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 29389c6efa4Sjeremylt } 29489c6efa4Sjeremylt // Set Qvec if needed 29589c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 29689c6efa4Sjeremylt CeedChk(ierr); 29789c6efa4Sjeremylt if (emode == CEED_EVAL_NONE) { 29889c6efa4Sjeremylt // Set qvec to single block evec 29989c6efa4Sjeremylt ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 30089c6efa4Sjeremylt &impl->edata[i + numinputfields]); 30189c6efa4Sjeremylt CeedChk(ierr); 30289c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 30389c6efa4Sjeremylt CEED_USE_POINTER, 30489c6efa4Sjeremylt impl->edata[i + numinputfields]); CeedChk(ierr); 30589c6efa4Sjeremylt ierr = CeedVectorRestoreArray(impl->evecsout[i], 30689c6efa4Sjeremylt &impl->edata[i + numinputfields]); 30789c6efa4Sjeremylt CeedChk(ierr); 30889c6efa4Sjeremylt } 30989c6efa4Sjeremylt } 31089c6efa4Sjeremylt impl->add = false; 31189c6efa4Sjeremylt 31289c6efa4Sjeremylt // Loop through elements 31389c6efa4Sjeremylt for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 31489c6efa4Sjeremylt // Input basis apply if needed 31589c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 31689c6efa4Sjeremylt CeedInt activein = 0; 31789c6efa4Sjeremylt // Get elemsize, emode, ncomp 31889c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 31989c6efa4Sjeremylt CeedChk(ierr); 32089c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 32189c6efa4Sjeremylt CeedChk(ierr); 32289c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 32389c6efa4Sjeremylt CeedChk(ierr); 32489c6efa4Sjeremylt ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp); 32589c6efa4Sjeremylt CeedChk(ierr); 32689c6efa4Sjeremylt // Restrict block active input 32789c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 32889c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) { 32989c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 33089c6efa4Sjeremylt CeedChk(ierr); 33189c6efa4Sjeremylt ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 33289c6efa4Sjeremylt CEED_NOTRANSPOSE, lmode, invec, 33389c6efa4Sjeremylt impl->evecsin[i], request); 33489c6efa4Sjeremylt CeedChk(ierr); 33589c6efa4Sjeremylt activein = 1; 33689c6efa4Sjeremylt } 33789c6efa4Sjeremylt // Basis action 33889c6efa4Sjeremylt switch(emode) { 33989c6efa4Sjeremylt case CEED_EVAL_NONE: 34089c6efa4Sjeremylt if (!activein) { 34189c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 34289c6efa4Sjeremylt CEED_USE_POINTER, 34389c6efa4Sjeremylt &impl->edata[i][e*Q*ncomp]); CeedChk(ierr); 34489c6efa4Sjeremylt } 34589c6efa4Sjeremylt break; 34689c6efa4Sjeremylt case CEED_EVAL_INTERP: 34789c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 34889c6efa4Sjeremylt CeedChk(ierr); 34989c6efa4Sjeremylt if (!activein) { 35089c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 35189c6efa4Sjeremylt CEED_USE_POINTER, 35289c6efa4Sjeremylt &impl->edata[i][e*elemsize*ncomp]); 35389c6efa4Sjeremylt CeedChk(ierr); 35489c6efa4Sjeremylt } 35589c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 35689c6efa4Sjeremylt CEED_EVAL_INTERP, impl->evecsin[i], 35789c6efa4Sjeremylt impl->qvecsin[i]); CeedChk(ierr); 35889c6efa4Sjeremylt break; 35989c6efa4Sjeremylt case CEED_EVAL_GRAD: 36089c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 36189c6efa4Sjeremylt CeedChk(ierr); 36289c6efa4Sjeremylt if (!activein) { 36389c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 36489c6efa4Sjeremylt CEED_USE_POINTER, 36589c6efa4Sjeremylt &impl->edata[i][e*elemsize*ncomp]); 36689c6efa4Sjeremylt CeedChk(ierr); 36789c6efa4Sjeremylt } 36889c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 36989c6efa4Sjeremylt CEED_EVAL_GRAD, impl->evecsin[i], 37089c6efa4Sjeremylt impl->qvecsin[i]); CeedChk(ierr); 37189c6efa4Sjeremylt break; 37289c6efa4Sjeremylt case CEED_EVAL_WEIGHT: 37389c6efa4Sjeremylt break; // No action 37489c6efa4Sjeremylt case CEED_EVAL_DIV: 37589c6efa4Sjeremylt break; // Not implimented 37689c6efa4Sjeremylt case CEED_EVAL_CURL: 37789c6efa4Sjeremylt break; // Not implimented 37889c6efa4Sjeremylt } 37989c6efa4Sjeremylt } 38089c6efa4Sjeremylt 38189c6efa4Sjeremylt // Q function 38289c6efa4Sjeremylt ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 38389c6efa4Sjeremylt CeedChk(ierr); 38489c6efa4Sjeremylt 38589c6efa4Sjeremylt // Output basis apply and restrict 38689c6efa4Sjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 38789c6efa4Sjeremylt // Get elemsize, emode, ncomp 38889c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 38989c6efa4Sjeremylt CeedChk(ierr); 39089c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 39189c6efa4Sjeremylt CeedChk(ierr); 39289c6efa4Sjeremylt // Basis action 39389c6efa4Sjeremylt switch(emode) { 39489c6efa4Sjeremylt case CEED_EVAL_NONE: 39589c6efa4Sjeremylt break; // No action 39689c6efa4Sjeremylt case CEED_EVAL_INTERP: 39789c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 39889c6efa4Sjeremylt CeedChk(ierr); 39989c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 40089c6efa4Sjeremylt CEED_EVAL_INTERP, impl->qvecsout[i], 40189c6efa4Sjeremylt impl->evecsout[i]); CeedChk(ierr); 40289c6efa4Sjeremylt break; 40389c6efa4Sjeremylt case CEED_EVAL_GRAD: 40489c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 40589c6efa4Sjeremylt CeedChk(ierr); 40689c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 40789c6efa4Sjeremylt CEED_EVAL_GRAD, impl->qvecsout[i], 40889c6efa4Sjeremylt impl->evecsout[i]); CeedChk(ierr); 40989c6efa4Sjeremylt break; 41089c6efa4Sjeremylt case CEED_EVAL_WEIGHT: { 41189c6efa4Sjeremylt Ceed ceed; 41289c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 41389c6efa4Sjeremylt return CeedError(ceed, 1, 41489c6efa4Sjeremylt "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 41589c6efa4Sjeremylt break; // Should not occur 41689c6efa4Sjeremylt } 41789c6efa4Sjeremylt case CEED_EVAL_DIV: 41889c6efa4Sjeremylt break; // Not implimented 41989c6efa4Sjeremylt case CEED_EVAL_CURL: 42089c6efa4Sjeremylt break; // Not implimented 42189c6efa4Sjeremylt } 42289c6efa4Sjeremylt // Restrict output block 42389c6efa4Sjeremylt // Get output vector 42489c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 42589c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) 42689c6efa4Sjeremylt vec = outvec; 42789c6efa4Sjeremylt // Restrict 42889c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 42989c6efa4Sjeremylt CeedChk(ierr); 43089c6efa4Sjeremylt ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 43189c6efa4Sjeremylt e/blksize, CEED_TRANSPOSE, 43289c6efa4Sjeremylt lmode, impl->evecsout[i], 43389c6efa4Sjeremylt vec, request); CeedChk(ierr); 43489c6efa4Sjeremylt } 43589c6efa4Sjeremylt } 43689c6efa4Sjeremylt 43789c6efa4Sjeremylt // Restore input arrays 43889c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 43989c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 44089c6efa4Sjeremylt CeedChk(ierr); 44189c6efa4Sjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 44289c6efa4Sjeremylt } else { 44389c6efa4Sjeremylt ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 44489c6efa4Sjeremylt (const CeedScalar **) &impl->edata[i]); 44589c6efa4Sjeremylt CeedChk(ierr); 44689c6efa4Sjeremylt } 44789c6efa4Sjeremylt } 44889c6efa4Sjeremylt 44989c6efa4Sjeremylt return 0; 45089c6efa4Sjeremylt } 45189c6efa4Sjeremylt 45289c6efa4Sjeremylt int CeedOperatorApply_Opt_1(CeedOperator op, CeedVector invec, 45389c6efa4Sjeremylt CeedVector outvec, CeedRequest *request) { 45489c6efa4Sjeremylt return CeedOperatorApply_Opt(op, 1, invec, outvec, request); 45589c6efa4Sjeremylt } 45689c6efa4Sjeremylt 45789c6efa4Sjeremylt int CeedOperatorApply_Opt_8(CeedOperator op, CeedVector invec, 45889c6efa4Sjeremylt CeedVector outvec, CeedRequest *request) { 45989c6efa4Sjeremylt return CeedOperatorApply_Opt(op, 8, invec, outvec, request); 46089c6efa4Sjeremylt } 46189c6efa4Sjeremylt 46289c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) { 46389c6efa4Sjeremylt int ierr; 46489c6efa4Sjeremylt Ceed ceed; 46589c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 46689c6efa4Sjeremylt Ceed_Opt *ceedimpl; 46789c6efa4Sjeremylt ierr = CeedGetData(ceed, (void*)&ceedimpl); CeedChk(ierr); 46889c6efa4Sjeremylt CeedInt blksize = ceedimpl->blksize; 46989c6efa4Sjeremylt CeedOperator_Opt *impl; 47089c6efa4Sjeremylt 47189c6efa4Sjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 47289c6efa4Sjeremylt ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 47389c6efa4Sjeremylt 47489c6efa4Sjeremylt if (blksize == 1) { 47589c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 47689c6efa4Sjeremylt CeedOperatorApply_Opt_1); CeedChk(ierr); 47789c6efa4Sjeremylt } else if (blksize == 8) { 47889c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 47989c6efa4Sjeremylt CeedOperatorApply_Opt_8); CeedChk(ierr); 48089c6efa4Sjeremylt } else { 48189c6efa4Sjeremylt return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 48289c6efa4Sjeremylt } 48389c6efa4Sjeremylt 48489c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 48589c6efa4Sjeremylt CeedOperatorDestroy_Opt); CeedChk(ierr); 48689c6efa4Sjeremylt return 0; 48789c6efa4Sjeremylt } 488