121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 1721617c04Sjeremylt #include <ceed-impl.h> 1821617c04Sjeremylt #include <string.h> 1921617c04Sjeremylt #include "ceed-ref.h" 2021617c04Sjeremylt 2121617c04Sjeremylt static int CeedOperatorDestroy_Ref(CeedOperator op) { 2221617c04Sjeremylt CeedOperator_Ref *impl = op->data; 2321617c04Sjeremylt int ierr; 2421617c04Sjeremylt 25885ac19cSjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26885ac19cSjeremylt ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 27885ac19cSjeremylt } 28885ac19cSjeremylt ierr = CeedFree(&impl->evecs); CeedChk(ierr); 29885ac19cSjeremylt ierr = CeedFree(&impl->edata); CeedChk(ierr); 30885ac19cSjeremylt 31885ac19cSjeremylt for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) { 32885ac19cSjeremylt ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr); 33885ac19cSjeremylt } 34885ac19cSjeremylt ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr); 35885ac19cSjeremylt ierr = CeedFree(&impl->qdata); CeedChk(ierr); 36885ac19cSjeremylt 37885ac19cSjeremylt ierr = CeedFree(&impl->indata); CeedChk(ierr); 38885ac19cSjeremylt ierr = CeedFree(&impl->outdata); CeedChk(ierr); 39885ac19cSjeremylt 4021617c04Sjeremylt ierr = CeedFree(&op->data); CeedChk(ierr); 4121617c04Sjeremylt return 0; 4221617c04Sjeremylt } 4321617c04Sjeremylt 44885ac19cSjeremylt /* 45885ac19cSjeremylt Setup infields or outfields 46885ac19cSjeremylt */ 47885ac19cSjeremylt static int CeedOperatorSetupFields_Ref(struct CeedQFunctionField qfields[16], 48885ac19cSjeremylt struct CeedOperatorField ofields[16], 49885ac19cSjeremylt CeedVector *evecs, CeedScalar **qdata, 50885ac19cSjeremylt CeedScalar **qdata_alloc, CeedScalar **indata, 51135a076eSjeremylt CeedInt starti, CeedInt startq, 52135a076eSjeremylt CeedInt numfields, CeedInt Q) { 53135a076eSjeremylt CeedInt dim, ierr, iq=startq, ncomp; 5421617c04Sjeremylt 55885ac19cSjeremylt // Loop over fields 56885ac19cSjeremylt for (CeedInt i=0; i<numfields; i++) { 57885ac19cSjeremylt CeedEvalMode emode = qfields[i].emode; 58135a076eSjeremylt 59135a076eSjeremylt if (emode != CEED_EVAL_WEIGHT) { 604b8bea3bSJed Brown ierr = CeedElemRestrictionCreateVector(ofields[i].Erestrict, NULL, 614b8bea3bSJed Brown &evecs[i+starti]); 62135a076eSjeremylt CeedChk(ierr); 63135a076eSjeremylt } 64135a076eSjeremylt 65885ac19cSjeremylt switch(emode) { 66885ac19cSjeremylt case CEED_EVAL_NONE: 67885ac19cSjeremylt break; // No action 68885ac19cSjeremylt case CEED_EVAL_INTERP: 69885ac19cSjeremylt ncomp = qfields[i].ncomp; 70885ac19cSjeremylt ierr = CeedMalloc(Q*ncomp, &qdata_alloc[iq]); CeedChk(ierr); 71885ac19cSjeremylt qdata[i + starti] = qdata_alloc[iq]; 72885ac19cSjeremylt iq++; 73885ac19cSjeremylt break; 74885ac19cSjeremylt case CEED_EVAL_GRAD: 75885ac19cSjeremylt ncomp = qfields[i].ncomp; 76885ac19cSjeremylt dim = ofields[i].basis->dim; 77885ac19cSjeremylt ierr = CeedMalloc(Q*ncomp*dim, &qdata_alloc[iq]); CeedChk(ierr); 78885ac19cSjeremylt qdata[i + starti] = qdata_alloc[iq]; 79885ac19cSjeremylt iq++; 80885ac19cSjeremylt break; 81885ac19cSjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 82885ac19cSjeremylt ierr = CeedMalloc(Q, &qdata_alloc[iq]); CeedChk(ierr); 83d3181881Sjeremylt ierr = CeedBasisApply(ofields[iq].basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 84885ac19cSjeremylt NULL, qdata_alloc[iq]); CeedChk(ierr); 85885ac19cSjeremylt qdata[i] = qdata_alloc[iq]; 86885ac19cSjeremylt indata[i] = qdata[i]; 87885ac19cSjeremylt iq++; 88885ac19cSjeremylt break; 89885ac19cSjeremylt case CEED_EVAL_DIV: 90885ac19cSjeremylt break; // Not implimented 91885ac19cSjeremylt case CEED_EVAL_CURL: 92885ac19cSjeremylt break; // Not implimented 9321617c04Sjeremylt } 94885ac19cSjeremylt } 9521617c04Sjeremylt return 0; 9621617c04Sjeremylt } 9721617c04Sjeremylt 98885ac19cSjeremylt /* 99885ac19cSjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 100885ac19cSjeremylt to the named inputs and outputs of its CeedQFunction. 101885ac19cSjeremylt */ 102885ac19cSjeremylt static int CeedOperatorSetup_Ref(CeedOperator op) { 103885ac19cSjeremylt if (op->setupdone) return 0; 104a2b73c81Sjeremylt CeedOperator_Ref *impl = op->data; 105885ac19cSjeremylt CeedQFunction qf = op->qf; 1061a4ead9bSjeremylt CeedInt Q = op->numqpoints, numinputfields, numoutputfields; 10721617c04Sjeremylt int ierr; 10821617c04Sjeremylt 109885ac19cSjeremylt // Count infield and outfield array sizes and evectors 110*a8de75f0Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 111*a8de75f0Sjeremylt CeedChk(ierr); 1121a4ead9bSjeremylt impl->numein = numinputfields; 1131a4ead9bSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 114885ac19cSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 1158d94b059Sjeremylt impl->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) + 1168d94b059Sjeremylt !!(emode & CEED_EVAL_WEIGHT); 11721617c04Sjeremylt } 1181a4ead9bSjeremylt impl->numeout = numoutputfields; 1191a4ead9bSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 120885ac19cSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 121a2b73c81Sjeremylt impl->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD); 122885ac19cSjeremylt } 123885ac19cSjeremylt 124885ac19cSjeremylt // Allocate 125a2b73c81Sjeremylt ierr = CeedCalloc(impl->numein + impl->numeout, &impl->evecs); CeedChk(ierr); 126a2b73c81Sjeremylt ierr = CeedCalloc(impl->numein + impl->numeout, &impl->edata); 127885ac19cSjeremylt CeedChk(ierr); 128885ac19cSjeremylt 129a2b73c81Sjeremylt ierr = CeedCalloc(impl->numqin + impl->numqout, &impl->qdata_alloc); 130885ac19cSjeremylt CeedChk(ierr); 1311a4ead9bSjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &impl->qdata); 132885ac19cSjeremylt CeedChk(ierr); 133885ac19cSjeremylt 134a2b73c81Sjeremylt ierr = CeedCalloc(16, &impl->indata); CeedChk(ierr); 135a2b73c81Sjeremylt ierr = CeedCalloc(16, &impl->outdata); CeedChk(ierr); 136885ac19cSjeremylt 137885ac19cSjeremylt // Set up infield and outfield pointer arrays 138885ac19cSjeremylt // Infields 139885ac19cSjeremylt ierr = CeedOperatorSetupFields_Ref(qf->inputfields, op->inputfields, 140a2b73c81Sjeremylt impl->evecs, impl->qdata, impl->qdata_alloc, 141a2b73c81Sjeremylt impl->indata, 0, 0, 1421a4ead9bSjeremylt numinputfields, Q); CeedChk(ierr); 143885ac19cSjeremylt 144885ac19cSjeremylt // Outfields 145885ac19cSjeremylt ierr = CeedOperatorSetupFields_Ref(qf->outputfields, op->outputfields, 146a2b73c81Sjeremylt impl->evecs, impl->qdata, impl->qdata_alloc, 1471a4ead9bSjeremylt impl->indata, numinputfields, 1481a4ead9bSjeremylt impl->numqin, numoutputfields, Q); CeedChk(ierr); 149885ac19cSjeremylt 1508d94b059Sjeremylt // Input Qvecs 1511a4ead9bSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 1528d94b059Sjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 1538d94b059Sjeremylt if ((emode != CEED_EVAL_NONE) && (emode != CEED_EVAL_WEIGHT)) 1548d94b059Sjeremylt impl->indata[i] = impl->qdata[i]; 1558d94b059Sjeremylt } 1567ca8db16Sjeremylt // Output Qvecs 1571a4ead9bSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 1587ca8db16Sjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 1598d94b059Sjeremylt if (emode != CEED_EVAL_NONE) 1601a4ead9bSjeremylt impl->outdata[i] = impl->qdata[i + numinputfields]; 1617ca8db16Sjeremylt } 1627ca8db16Sjeremylt 163885ac19cSjeremylt op->setupdone = 1; 164885ac19cSjeremylt 165885ac19cSjeremylt return 0; 166885ac19cSjeremylt } 167885ac19cSjeremylt 168885ac19cSjeremylt static int CeedOperatorApply_Ref(CeedOperator op, CeedVector invec, 169885ac19cSjeremylt CeedVector outvec, CeedRequest *request) { 170a2b73c81Sjeremylt CeedOperator_Ref *impl = op->data; 1711a4ead9bSjeremylt CeedInt Q = op->numqpoints, elemsize, numinputfields, numoutputfields; 172885ac19cSjeremylt int ierr; 173885ac19cSjeremylt CeedQFunction qf = op->qf; 174885ac19cSjeremylt CeedTransposeMode lmode = CEED_NOTRANSPOSE; 175885ac19cSjeremylt 176885ac19cSjeremylt // Setup 177885ac19cSjeremylt ierr = CeedOperatorSetup_Ref(op); CeedChk(ierr); 178*a8de75f0Sjeremylt ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 179*a8de75f0Sjeremylt CeedChk(ierr); 180885ac19cSjeremylt 181885ac19cSjeremylt // Input Evecs and Restriction 1821a4ead9bSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 183668048e2SJed Brown CeedEvalMode emode = qf->inputfields[i].emode; 184135a076eSjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 185668048e2SJed Brown } else { 186668048e2SJed Brown // Active 187668048e2SJed Brown if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) { 188668048e2SJed Brown // Restrict 189668048e2SJed Brown ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE, 190a2b73c81Sjeremylt lmode, invec, impl->evecs[i], 191668048e2SJed Brown request); CeedChk(ierr); 192668048e2SJed Brown // Get evec 193a2b73c81Sjeremylt ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 194a2b73c81Sjeremylt (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); 195668048e2SJed Brown } else { 196885ac19cSjeremylt // Passive 1977ca8db16Sjeremylt // Restrict 198885ac19cSjeremylt ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE, 199a2b73c81Sjeremylt lmode, op->inputfields[i].vec, impl->evecs[i], 200885ac19cSjeremylt request); CeedChk(ierr); 2017ca8db16Sjeremylt // Get evec 202a2b73c81Sjeremylt ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 203a2b73c81Sjeremylt (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); 204885ac19cSjeremylt } 205885ac19cSjeremylt } 206885ac19cSjeremylt } 207885ac19cSjeremylt 208885ac19cSjeremylt // Output Evecs 2091a4ead9bSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 210a2b73c81Sjeremylt ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 2111a4ead9bSjeremylt &impl->edata[i + numinputfields]); CeedChk(ierr); 212885ac19cSjeremylt } 213885ac19cSjeremylt 214885ac19cSjeremylt // Loop through elements 215885ac19cSjeremylt for (CeedInt e=0; e<op->numelements; e++) { 216885ac19cSjeremylt // Input basis apply if needed 2171a4ead9bSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 218135a076eSjeremylt // Get elemsize, emode, ncomp 219885ac19cSjeremylt elemsize = op->inputfields[i].Erestrict->elemsize; 220885ac19cSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 221885ac19cSjeremylt CeedInt ncomp = qf->inputfields[i].ncomp; 222885ac19cSjeremylt // Basis action 223885ac19cSjeremylt switch(emode) { 224885ac19cSjeremylt case CEED_EVAL_NONE: 225a2b73c81Sjeremylt impl->indata[i] = &impl->edata[i][e*Q*ncomp]; 226885ac19cSjeremylt break; 227885ac19cSjeremylt case CEED_EVAL_INTERP: 228d3181881Sjeremylt ierr = CeedBasisApply(op->inputfields[i].basis, 1, CEED_NOTRANSPOSE, 229a2b73c81Sjeremylt CEED_EVAL_INTERP, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]); 230885ac19cSjeremylt CeedChk(ierr); 231885ac19cSjeremylt break; 232885ac19cSjeremylt case CEED_EVAL_GRAD: 233d3181881Sjeremylt ierr = CeedBasisApply(op->inputfields[i].basis, 1, CEED_NOTRANSPOSE, 234a2b73c81Sjeremylt CEED_EVAL_GRAD, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]); 235885ac19cSjeremylt CeedChk(ierr); 236885ac19cSjeremylt break; 237885ac19cSjeremylt case CEED_EVAL_WEIGHT: 238885ac19cSjeremylt break; // No action 239885ac19cSjeremylt case CEED_EVAL_DIV: 240885ac19cSjeremylt break; // Not implimented 241885ac19cSjeremylt case CEED_EVAL_CURL: 242885ac19cSjeremylt break; // Not implimented 243885ac19cSjeremylt } 244885ac19cSjeremylt } 245885ac19cSjeremylt // Output pointers 2461a4ead9bSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 247885ac19cSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 248885ac19cSjeremylt if (emode == CEED_EVAL_NONE) { 249885ac19cSjeremylt CeedInt ncomp = qf->outputfields[i].ncomp; 2501a4ead9bSjeremylt impl->outdata[i] = &impl->edata[i + numinputfields][e*Q*ncomp]; 251885ac19cSjeremylt } 252885ac19cSjeremylt } 253885ac19cSjeremylt // Q function 254a2b73c81Sjeremylt ierr = CeedQFunctionApply(op->qf, Q, (const CeedScalar * const*) impl->indata, 255a2b73c81Sjeremylt impl->outdata); CeedChk(ierr); 256885ac19cSjeremylt 257885ac19cSjeremylt // Output basis apply if needed 2581a4ead9bSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 259135a076eSjeremylt // Get elemsize, emode, ncomp 260885ac19cSjeremylt elemsize = op->outputfields[i].Erestrict->elemsize; 261885ac19cSjeremylt CeedInt ncomp = qf->outputfields[i].ncomp; 262885ac19cSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 263885ac19cSjeremylt // Basis action 264885ac19cSjeremylt switch(emode) { 265885ac19cSjeremylt case CEED_EVAL_NONE: 266885ac19cSjeremylt break; // No action 267885ac19cSjeremylt case CEED_EVAL_INTERP: 268d3181881Sjeremylt ierr = CeedBasisApply(op->outputfields[i].basis, 1, CEED_TRANSPOSE, 269a2b73c81Sjeremylt CEED_EVAL_INTERP, impl->outdata[i], 2701a4ead9bSjeremylt &impl->edata[i + numinputfields][e*elemsize*ncomp]); 2718d94b059Sjeremylt CeedChk(ierr); 272885ac19cSjeremylt break; 273885ac19cSjeremylt case CEED_EVAL_GRAD: 2740c7a96bbSjeremylt ierr = CeedBasisApply(op->outputfields[i].basis, 1, CEED_TRANSPOSE, 2750c7a96bbSjeremylt CEED_EVAL_GRAD, 2761a4ead9bSjeremylt impl->outdata[i], &impl->edata[i + numinputfields][e*elemsize*ncomp]); 277885ac19cSjeremylt CeedChk(ierr); 278885ac19cSjeremylt break; 279885ac19cSjeremylt case CEED_EVAL_WEIGHT: 2808d94b059Sjeremylt return CeedError(op->ceed, 1, 2818d94b059Sjeremylt "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 282885ac19cSjeremylt break; // Should not occur 283885ac19cSjeremylt case CEED_EVAL_DIV: 284885ac19cSjeremylt break; // Not implimented 285885ac19cSjeremylt case CEED_EVAL_CURL: 286885ac19cSjeremylt break; // Not implimented 287885ac19cSjeremylt } 288885ac19cSjeremylt } 289885ac19cSjeremylt } 290885ac19cSjeremylt 29142ecf959Sjeremylt // Zero lvecs 29242ecf959Sjeremylt ierr = CeedVectorSetValue(outvec, 0.0); CeedChk(ierr); 29342ecf959Sjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) 29442ecf959Sjeremylt if (op->outputfields[i].vec != CEED_VECTOR_ACTIVE) { 29542ecf959Sjeremylt ierr = CeedVectorSetValue(op->outputfields[i].vec, 0.0); CeedChk(ierr); 29642ecf959Sjeremylt } 29742ecf959Sjeremylt 298885ac19cSjeremylt // Output restriction 2991a4ead9bSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 300a2b73c81Sjeremylt // Restore evec 301a2b73c81Sjeremylt ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 3021a4ead9bSjeremylt &impl->edata[i + numinputfields]); CeedChk(ierr); 303668048e2SJed Brown // Active 304668048e2SJed Brown if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) { 3057ca8db16Sjeremylt // Restrict 306885ac19cSjeremylt ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE, 307a2b73c81Sjeremylt lmode, impl->evecs[i+impl->numein], outvec, request); CeedChk(ierr); 308885ac19cSjeremylt } else { 309885ac19cSjeremylt // Passive 310668048e2SJed Brown // Restrict 311668048e2SJed Brown ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE, 312a2b73c81Sjeremylt lmode, impl->evecs[i+impl->numein], op->outputfields[i].vec, 313668048e2SJed Brown request); CeedChk(ierr); 314885ac19cSjeremylt } 315885ac19cSjeremylt } 316885ac19cSjeremylt 3177ca8db16Sjeremylt // Restore input arrays 3181a4ead9bSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 3197ca8db16Sjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 320135a076eSjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 3217ca8db16Sjeremylt } else { 322a2b73c81Sjeremylt ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 323a2b73c81Sjeremylt (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); 3247ca8db16Sjeremylt } 3257ca8db16Sjeremylt } 3267ca8db16Sjeremylt 32721617c04Sjeremylt return 0; 32821617c04Sjeremylt } 32921617c04Sjeremylt 33021617c04Sjeremylt int CeedOperatorCreate_Ref(CeedOperator op) { 33121617c04Sjeremylt CeedOperator_Ref *impl; 33221617c04Sjeremylt int ierr; 33321617c04Sjeremylt 33421617c04Sjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 33521617c04Sjeremylt op->data = impl; 33621617c04Sjeremylt op->Destroy = CeedOperatorDestroy_Ref; 33721617c04Sjeremylt op->Apply = CeedOperatorApply_Ref; 33821617c04Sjeremylt return 0; 33921617c04Sjeremylt } 340