189c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 289c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 389c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 489c6efa4Sjeremylt // 589c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 689c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 789c6efa4Sjeremylt // element discretizations for exascale applications. For more information and 889c6efa4Sjeremylt // source code availability see http://github.com/ceed. 989c6efa4Sjeremylt // 1089c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1189c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1289c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1389c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1489c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early 1589c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1689c6efa4Sjeremylt 1789c6efa4Sjeremylt #include <string.h> 1889c6efa4Sjeremylt #include "ceed-opt.h" 1989c6efa4Sjeremylt #include "../ref/ceed-ref.h" 2089c6efa4Sjeremylt 2189c6efa4Sjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) { 2289c6efa4Sjeremylt int ierr; 2389c6efa4Sjeremylt CeedOperator_Opt *impl; 2489c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 2589c6efa4Sjeremylt 2689c6efa4Sjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 2789c6efa4Sjeremylt ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 2889c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 2989c6efa4Sjeremylt } 3089c6efa4Sjeremylt ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 3189c6efa4Sjeremylt ierr = CeedFree(&impl->evecs); CeedChk(ierr); 3289c6efa4Sjeremylt ierr = CeedFree(&impl->edata); CeedChk(ierr); 3389c6efa4Sjeremylt ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 3489c6efa4Sjeremylt 3589c6efa4Sjeremylt for (CeedInt i=0; i<impl->numein; i++) { 3689c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 3789c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 3889c6efa4Sjeremylt } 3989c6efa4Sjeremylt ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 4089c6efa4Sjeremylt ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 4189c6efa4Sjeremylt 4289c6efa4Sjeremylt for (CeedInt i=0; i<impl->numeout; i++) { 4389c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 4489c6efa4Sjeremylt ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 4589c6efa4Sjeremylt } 4689c6efa4Sjeremylt ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 4789c6efa4Sjeremylt ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 4889c6efa4Sjeremylt 4989c6efa4Sjeremylt ierr = CeedFree(&impl); CeedChk(ierr); 5089c6efa4Sjeremylt return 0; 5189c6efa4Sjeremylt } 5289c6efa4Sjeremylt 5389c6efa4Sjeremylt /* 5489c6efa4Sjeremylt Setup infields or outfields 5589c6efa4Sjeremylt */ 5689c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 5789c6efa4Sjeremylt bool inOrOut, const CeedInt blksize, 5889c6efa4Sjeremylt CeedElemRestriction *blkrestr, 5989c6efa4Sjeremylt CeedVector *fullevecs, CeedVector *evecs, 6089c6efa4Sjeremylt CeedVector *qvecs, CeedInt starte, 6189c6efa4Sjeremylt CeedInt numfields, CeedInt Q) { 624d537eeaSYohann CeedInt dim, ierr, ncomp, size, P; 6389c6efa4Sjeremylt Ceed ceed; 6489c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 6589c6efa4Sjeremylt CeedBasis basis; 6689c6efa4Sjeremylt CeedElemRestriction r; 6789c6efa4Sjeremylt CeedOperatorField *opfields; 6889c6efa4Sjeremylt CeedQFunctionField *qffields; 6989c6efa4Sjeremylt if (inOrOut) { 7089c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, NULL, &opfields); 7189c6efa4Sjeremylt CeedChk(ierr); 7289c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 7389c6efa4Sjeremylt CeedChk(ierr); 7489c6efa4Sjeremylt } else { 7589c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opfields, NULL); 7689c6efa4Sjeremylt CeedChk(ierr); 7789c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 7889c6efa4Sjeremylt CeedChk(ierr); 7989c6efa4Sjeremylt } 8089c6efa4Sjeremylt 8189c6efa4Sjeremylt // Loop over fields 8289c6efa4Sjeremylt for (CeedInt i=0; i<numfields; i++) { 8389c6efa4Sjeremylt CeedEvalMode emode; 8489c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 8589c6efa4Sjeremylt 8689c6efa4Sjeremylt if (emode != CEED_EVAL_WEIGHT) { 8789c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 8889c6efa4Sjeremylt CeedChk(ierr); 8989c6efa4Sjeremylt CeedElemRestriction_Ref *data; 9089c6efa4Sjeremylt ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 9189c6efa4Sjeremylt Ceed ceed; 9289c6efa4Sjeremylt ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 938795c945Sjeremylt CeedInt nelem, elemsize, nnodes; 9489c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 9589c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 968795c945Sjeremylt ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 9789c6efa4Sjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 9889c6efa4Sjeremylt ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 998795c945Sjeremylt blksize, nnodes, ncomp, 10089c6efa4Sjeremylt CEED_MEM_HOST, CEED_COPY_VALUES, 10189c6efa4Sjeremylt data->indices, &blkrestr[i+starte]); 10289c6efa4Sjeremylt CeedChk(ierr); 10389c6efa4Sjeremylt ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 10489c6efa4Sjeremylt &fullevecs[i+starte]); 10589c6efa4Sjeremylt CeedChk(ierr); 10689c6efa4Sjeremylt } 10789c6efa4Sjeremylt 10889c6efa4Sjeremylt switch(emode) { 10989c6efa4Sjeremylt case CEED_EVAL_NONE: 1104d537eeaSYohann ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 1114d537eeaSYohann ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 1124d537eeaSYohann ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 11389c6efa4Sjeremylt break; 11489c6efa4Sjeremylt case CEED_EVAL_INTERP: 1154d537eeaSYohann ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 11689c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &P); 11789c6efa4Sjeremylt CeedChk(ierr); 1184d537eeaSYohann ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 1194d537eeaSYohann ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 12089c6efa4Sjeremylt break; 12189c6efa4Sjeremylt case CEED_EVAL_GRAD: 12289c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 1234d537eeaSYohann ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 12489c6efa4Sjeremylt ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 12589c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(r, &P); 12689c6efa4Sjeremylt CeedChk(ierr); 1274d537eeaSYohann ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 1284d537eeaSYohann ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 12989c6efa4Sjeremylt break; 13089c6efa4Sjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 13189c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 13289c6efa4Sjeremylt ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 13389c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 13489c6efa4Sjeremylt CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 13589c6efa4Sjeremylt 13689c6efa4Sjeremylt break; 13789c6efa4Sjeremylt case CEED_EVAL_DIV: 13889c6efa4Sjeremylt break; // Not implimented 13989c6efa4Sjeremylt case CEED_EVAL_CURL: 14089c6efa4Sjeremylt break; // Not implimented 14189c6efa4Sjeremylt } 14289c6efa4Sjeremylt } 14389c6efa4Sjeremylt return 0; 14489c6efa4Sjeremylt } 14589c6efa4Sjeremylt 14689c6efa4Sjeremylt /* 14789c6efa4Sjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 14889c6efa4Sjeremylt to the named inputs and outputs of its CeedQFunction. 14989c6efa4Sjeremylt */ 15089c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) { 15189c6efa4Sjeremylt int ierr; 15289c6efa4Sjeremylt bool setupdone; 15389c6efa4Sjeremylt ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 15489c6efa4Sjeremylt if (setupdone) return 0; 15589c6efa4Sjeremylt Ceed ceed; 15689c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 15789c6efa4Sjeremylt Ceed_Opt *ceedimpl; 15889c6efa4Sjeremylt ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 15989c6efa4Sjeremylt const CeedInt blksize = ceedimpl->blksize; 16089c6efa4Sjeremylt CeedOperator_Opt *impl; 16189c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 16289c6efa4Sjeremylt CeedQFunction qf; 16389c6efa4Sjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 16489c6efa4Sjeremylt CeedInt Q, numinputfields, numoutputfields; 16589c6efa4Sjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 16689c6efa4Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 16789c6efa4Sjeremylt CeedChk(ierr); 16889c6efa4Sjeremylt CeedOperatorField *opinputfields, *opoutputfields; 16989c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 17089c6efa4Sjeremylt CeedChk(ierr); 17189c6efa4Sjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 17289c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 17389c6efa4Sjeremylt CeedChk(ierr); 17489c6efa4Sjeremylt 17589c6efa4Sjeremylt // Allocate 17689c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 17789c6efa4Sjeremylt CeedChk(ierr); 17889c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 17989c6efa4Sjeremylt CeedChk(ierr); 18089c6efa4Sjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 18189c6efa4Sjeremylt CeedChk(ierr); 18289c6efa4Sjeremylt 18389c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 18489c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 18589c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 18689c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 18789c6efa4Sjeremylt ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 18889c6efa4Sjeremylt 18989c6efa4Sjeremylt impl->numein = numinputfields; impl->numeout = numoutputfields; 19089c6efa4Sjeremylt 19189c6efa4Sjeremylt // Set up infield and outfield pointer arrays 19289c6efa4Sjeremylt // Infields 19389c6efa4Sjeremylt ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 19489c6efa4Sjeremylt impl->evecs, impl->evecsin, 19589c6efa4Sjeremylt impl->qvecsin, 0, 19689c6efa4Sjeremylt numinputfields, Q); 19789c6efa4Sjeremylt CeedChk(ierr); 19889c6efa4Sjeremylt // Outfields 19989c6efa4Sjeremylt ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 20089c6efa4Sjeremylt impl->evecs, impl->evecsout, 20189c6efa4Sjeremylt impl->qvecsout, numinputfields, 20289c6efa4Sjeremylt numoutputfields, Q); 20389c6efa4Sjeremylt CeedChk(ierr); 20489c6efa4Sjeremylt 20589c6efa4Sjeremylt ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 20689c6efa4Sjeremylt 20789c6efa4Sjeremylt return 0; 20889c6efa4Sjeremylt } 20989c6efa4Sjeremylt 21089c6efa4Sjeremylt static inline int CeedOperatorApply_Opt(CeedOperator op, 21189c6efa4Sjeremylt const CeedInt blksize, CeedVector invec, 21289c6efa4Sjeremylt CeedVector outvec, 21389c6efa4Sjeremylt CeedRequest *request) { 21489c6efa4Sjeremylt int ierr; 21589c6efa4Sjeremylt CeedOperator_Opt *impl; 21689c6efa4Sjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 2174d537eeaSYohann CeedInt Q, elemsize, numinputfields, numoutputfields, numelements, size, dim; 21889c6efa4Sjeremylt ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 21989c6efa4Sjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 22089c6efa4Sjeremylt CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 22189c6efa4Sjeremylt CeedQFunction qf; 22289c6efa4Sjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 22389c6efa4Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 22489c6efa4Sjeremylt CeedChk(ierr); 22589c6efa4Sjeremylt CeedTransposeMode lmode; 22689c6efa4Sjeremylt CeedOperatorField *opinputfields, *opoutputfields; 22789c6efa4Sjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 22889c6efa4Sjeremylt CeedChk(ierr); 22989c6efa4Sjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 23089c6efa4Sjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 23189c6efa4Sjeremylt CeedChk(ierr); 23289c6efa4Sjeremylt CeedEvalMode emode; 23389c6efa4Sjeremylt CeedVector vec; 23489c6efa4Sjeremylt CeedBasis basis; 23589c6efa4Sjeremylt CeedElemRestriction Erestrict; 23689c6efa4Sjeremylt uint64_t state; 23789c6efa4Sjeremylt 23889c6efa4Sjeremylt // Setup 23989c6efa4Sjeremylt ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 24089c6efa4Sjeremylt 24189c6efa4Sjeremylt // Input Evecs and Restriction 24289c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 24389c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 24489c6efa4Sjeremylt CeedChk(ierr); 24589c6efa4Sjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 24689c6efa4Sjeremylt } else { 24789c6efa4Sjeremylt // Get input vector 24889c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 24989c6efa4Sjeremylt if (vec != CEED_VECTOR_ACTIVE) { 25089c6efa4Sjeremylt // Restrict 25189c6efa4Sjeremylt ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 25289c6efa4Sjeremylt if (state != impl->inputstate[i]) { 25389c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 25489c6efa4Sjeremylt CeedChk(ierr); 25589c6efa4Sjeremylt ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 25689c6efa4Sjeremylt lmode, vec, impl->evecs[i], request); 25789c6efa4Sjeremylt CeedChk(ierr); 25889c6efa4Sjeremylt impl->inputstate[i] = state; 25989c6efa4Sjeremylt } 26089c6efa4Sjeremylt } else { 26189c6efa4Sjeremylt // Set Qvec for CEED_EVAL_NONE 26289c6efa4Sjeremylt if (emode == CEED_EVAL_NONE) { 26389c6efa4Sjeremylt ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 26489c6efa4Sjeremylt &impl->edata[i]); CeedChk(ierr); 26589c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 26689c6efa4Sjeremylt CEED_USE_POINTER, 26789c6efa4Sjeremylt impl->edata[i]); CeedChk(ierr); 26889c6efa4Sjeremylt ierr = CeedVectorRestoreArray(impl->evecsin[i], 26989c6efa4Sjeremylt &impl->edata[i]); CeedChk(ierr); 27089c6efa4Sjeremylt } 27189c6efa4Sjeremylt } 27289c6efa4Sjeremylt // Get evec 27389c6efa4Sjeremylt ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 27489c6efa4Sjeremylt (const CeedScalar **) &impl->edata[i]); 27589c6efa4Sjeremylt CeedChk(ierr); 27689c6efa4Sjeremylt } 27789c6efa4Sjeremylt } 27889c6efa4Sjeremylt 27989c6efa4Sjeremylt // Output Lvecs, Evecs, and Qvecs 28089c6efa4Sjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 28189c6efa4Sjeremylt // Zero Lvecs 28289c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 28389c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) { 28489c6efa4Sjeremylt if (!impl->add) { 28589c6efa4Sjeremylt vec = outvec; 28689c6efa4Sjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 28789c6efa4Sjeremylt } 28889c6efa4Sjeremylt } else { 28989c6efa4Sjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 29089c6efa4Sjeremylt } 29189c6efa4Sjeremylt // Set Qvec if needed 29289c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 29389c6efa4Sjeremylt CeedChk(ierr); 29489c6efa4Sjeremylt if (emode == CEED_EVAL_NONE) { 29589c6efa4Sjeremylt // Set qvec to single block evec 29689c6efa4Sjeremylt ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 29789c6efa4Sjeremylt &impl->edata[i + numinputfields]); 29889c6efa4Sjeremylt CeedChk(ierr); 29989c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 30089c6efa4Sjeremylt CEED_USE_POINTER, 30189c6efa4Sjeremylt impl->edata[i + numinputfields]); CeedChk(ierr); 30289c6efa4Sjeremylt ierr = CeedVectorRestoreArray(impl->evecsout[i], 30389c6efa4Sjeremylt &impl->edata[i + numinputfields]); 30489c6efa4Sjeremylt CeedChk(ierr); 30589c6efa4Sjeremylt } 30689c6efa4Sjeremylt } 30789c6efa4Sjeremylt impl->add = false; 30889c6efa4Sjeremylt 30989c6efa4Sjeremylt // Loop through elements 31089c6efa4Sjeremylt for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 31189c6efa4Sjeremylt // Input basis apply if needed 31289c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 31389c6efa4Sjeremylt CeedInt activein = 0; 3144d537eeaSYohann // Get elemsize, emode, size 31589c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 31689c6efa4Sjeremylt CeedChk(ierr); 31789c6efa4Sjeremylt ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 31889c6efa4Sjeremylt CeedChk(ierr); 31989c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 32089c6efa4Sjeremylt CeedChk(ierr); 3214d537eeaSYohann ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 32289c6efa4Sjeremylt // Restrict block active input 32389c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 32489c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) { 32589c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 32689c6efa4Sjeremylt CeedChk(ierr); 32789c6efa4Sjeremylt ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 32889c6efa4Sjeremylt CEED_NOTRANSPOSE, lmode, invec, 32989c6efa4Sjeremylt impl->evecsin[i], request); 33089c6efa4Sjeremylt CeedChk(ierr); 33189c6efa4Sjeremylt activein = 1; 33289c6efa4Sjeremylt } 33389c6efa4Sjeremylt // Basis action 33489c6efa4Sjeremylt switch(emode) { 33589c6efa4Sjeremylt case CEED_EVAL_NONE: 33689c6efa4Sjeremylt if (!activein) { 33789c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 33889c6efa4Sjeremylt CEED_USE_POINTER, 3394d537eeaSYohann &impl->edata[i][e*Q*size]); CeedChk(ierr); 34089c6efa4Sjeremylt } 34189c6efa4Sjeremylt break; 34289c6efa4Sjeremylt case CEED_EVAL_INTERP: 34389c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 34489c6efa4Sjeremylt CeedChk(ierr); 34589c6efa4Sjeremylt if (!activein) { 34689c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 34789c6efa4Sjeremylt CEED_USE_POINTER, 3484d537eeaSYohann &impl->edata[i][e*elemsize*size]); 34989c6efa4Sjeremylt CeedChk(ierr); 35089c6efa4Sjeremylt } 35189c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 35289c6efa4Sjeremylt CEED_EVAL_INTERP, impl->evecsin[i], 35389c6efa4Sjeremylt impl->qvecsin[i]); CeedChk(ierr); 35489c6efa4Sjeremylt break; 35589c6efa4Sjeremylt case CEED_EVAL_GRAD: 35689c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 35789c6efa4Sjeremylt CeedChk(ierr); 35889c6efa4Sjeremylt if (!activein) { 3594d537eeaSYohann ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 36089c6efa4Sjeremylt ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 36189c6efa4Sjeremylt CEED_USE_POINTER, 3624d537eeaSYohann &impl->edata[i][e*elemsize*size/dim]); 36389c6efa4Sjeremylt CeedChk(ierr); 36489c6efa4Sjeremylt } 36589c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 36689c6efa4Sjeremylt CEED_EVAL_GRAD, impl->evecsin[i], 36789c6efa4Sjeremylt impl->qvecsin[i]); CeedChk(ierr); 36889c6efa4Sjeremylt break; 36989c6efa4Sjeremylt case CEED_EVAL_WEIGHT: 37089c6efa4Sjeremylt break; // No action 37189c6efa4Sjeremylt case CEED_EVAL_DIV: 37289c6efa4Sjeremylt break; // Not implimented 37389c6efa4Sjeremylt case CEED_EVAL_CURL: 37489c6efa4Sjeremylt break; // Not implimented 37589c6efa4Sjeremylt } 37689c6efa4Sjeremylt } 37789c6efa4Sjeremylt 37889c6efa4Sjeremylt // Q function 37989c6efa4Sjeremylt ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 38089c6efa4Sjeremylt CeedChk(ierr); 38189c6efa4Sjeremylt 38289c6efa4Sjeremylt // Output basis apply and restrict 38389c6efa4Sjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 3844d537eeaSYohann // Get elemsize, emode, size 38589c6efa4Sjeremylt ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 38689c6efa4Sjeremylt CeedChk(ierr); 38789c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 38889c6efa4Sjeremylt CeedChk(ierr); 38989c6efa4Sjeremylt // Basis action 39089c6efa4Sjeremylt switch(emode) { 39189c6efa4Sjeremylt case CEED_EVAL_NONE: 39289c6efa4Sjeremylt break; // No action 39389c6efa4Sjeremylt case CEED_EVAL_INTERP: 39489c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 39589c6efa4Sjeremylt CeedChk(ierr); 39689c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 39789c6efa4Sjeremylt CEED_EVAL_INTERP, impl->qvecsout[i], 39889c6efa4Sjeremylt impl->evecsout[i]); CeedChk(ierr); 39989c6efa4Sjeremylt break; 40089c6efa4Sjeremylt case CEED_EVAL_GRAD: 40189c6efa4Sjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 40289c6efa4Sjeremylt CeedChk(ierr); 40389c6efa4Sjeremylt ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 40489c6efa4Sjeremylt CEED_EVAL_GRAD, impl->qvecsout[i], 40589c6efa4Sjeremylt impl->evecsout[i]); CeedChk(ierr); 40689c6efa4Sjeremylt break; 40789c6efa4Sjeremylt case CEED_EVAL_WEIGHT: { 408*c042f62fSJeremy L Thompson // LCOV_EXCL_START 40989c6efa4Sjeremylt Ceed ceed; 41089c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 41189c6efa4Sjeremylt return CeedError(ceed, 1, 41289c6efa4Sjeremylt "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 413*c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 41489c6efa4Sjeremylt break; // Should not occur 41589c6efa4Sjeremylt } 41689c6efa4Sjeremylt case CEED_EVAL_DIV: 41789c6efa4Sjeremylt break; // Not implimented 41889c6efa4Sjeremylt case CEED_EVAL_CURL: 41989c6efa4Sjeremylt break; // Not implimented 42089c6efa4Sjeremylt } 42189c6efa4Sjeremylt // Restrict output block 42289c6efa4Sjeremylt // Get output vector 42389c6efa4Sjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 42489c6efa4Sjeremylt if (vec == CEED_VECTOR_ACTIVE) 42589c6efa4Sjeremylt vec = outvec; 42689c6efa4Sjeremylt // Restrict 42789c6efa4Sjeremylt ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 42889c6efa4Sjeremylt CeedChk(ierr); 42989c6efa4Sjeremylt ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 43089c6efa4Sjeremylt e/blksize, CEED_TRANSPOSE, 43189c6efa4Sjeremylt lmode, impl->evecsout[i], 43289c6efa4Sjeremylt vec, request); CeedChk(ierr); 43389c6efa4Sjeremylt } 43489c6efa4Sjeremylt } 43589c6efa4Sjeremylt 43689c6efa4Sjeremylt // Restore input arrays 43789c6efa4Sjeremylt for (CeedInt i=0; i<numinputfields; i++) { 43889c6efa4Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 43989c6efa4Sjeremylt CeedChk(ierr); 44089c6efa4Sjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 44189c6efa4Sjeremylt } else { 44289c6efa4Sjeremylt ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 44389c6efa4Sjeremylt (const CeedScalar **) &impl->edata[i]); 44489c6efa4Sjeremylt CeedChk(ierr); 44589c6efa4Sjeremylt } 44689c6efa4Sjeremylt } 44789c6efa4Sjeremylt 44889c6efa4Sjeremylt return 0; 44989c6efa4Sjeremylt } 45089c6efa4Sjeremylt 45189c6efa4Sjeremylt int CeedOperatorApply_Opt_1(CeedOperator op, CeedVector invec, 45289c6efa4Sjeremylt CeedVector outvec, CeedRequest *request) { 45389c6efa4Sjeremylt return CeedOperatorApply_Opt(op, 1, invec, outvec, request); 45489c6efa4Sjeremylt } 45589c6efa4Sjeremylt 45689c6efa4Sjeremylt int CeedOperatorApply_Opt_8(CeedOperator op, CeedVector invec, 45789c6efa4Sjeremylt CeedVector outvec, CeedRequest *request) { 45889c6efa4Sjeremylt return CeedOperatorApply_Opt(op, 8, invec, outvec, request); 45989c6efa4Sjeremylt } 46089c6efa4Sjeremylt 46189c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) { 46289c6efa4Sjeremylt int ierr; 46389c6efa4Sjeremylt Ceed ceed; 46489c6efa4Sjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 46589c6efa4Sjeremylt Ceed_Opt *ceedimpl; 46689c6efa4Sjeremylt ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 46789c6efa4Sjeremylt CeedInt blksize = ceedimpl->blksize; 46889c6efa4Sjeremylt CeedOperator_Opt *impl; 46989c6efa4Sjeremylt 47089c6efa4Sjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 47189c6efa4Sjeremylt ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 47289c6efa4Sjeremylt 47389c6efa4Sjeremylt if (blksize == 1) { 47489c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 47589c6efa4Sjeremylt CeedOperatorApply_Opt_1); CeedChk(ierr); 47689c6efa4Sjeremylt } else if (blksize == 8) { 47789c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 47889c6efa4Sjeremylt CeedOperatorApply_Opt_8); CeedChk(ierr); 47989c6efa4Sjeremylt } else { 480*c042f62fSJeremy L Thompson // LCOV_EXCL_START 48189c6efa4Sjeremylt return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 482*c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 48389c6efa4Sjeremylt } 48489c6efa4Sjeremylt 48589c6efa4Sjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 48689c6efa4Sjeremylt CeedOperatorDestroy_Opt); CeedChk(ierr); 48789c6efa4Sjeremylt return 0; 48889c6efa4Sjeremylt } 489