121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 1721617c04Sjeremylt #include <ceed-impl.h> 1821617c04Sjeremylt #include <string.h> 1921617c04Sjeremylt #include "ceed-ref.h" 2021617c04Sjeremylt 2121617c04Sjeremylt static int CeedOperatorDestroy_Ref(CeedOperator op) { 2221617c04Sjeremylt CeedOperator_Ref *impl = op->data; 2321617c04Sjeremylt int ierr; 2421617c04Sjeremylt 25885ac19cSjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26*135a076eSjeremylt if (impl->evecs[i]) { 27885ac19cSjeremylt ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 28885ac19cSjeremylt } 29*135a076eSjeremylt } 30885ac19cSjeremylt ierr = CeedFree(&impl->evecs); CeedChk(ierr); 31885ac19cSjeremylt ierr = CeedFree(&impl->edata); CeedChk(ierr); 32885ac19cSjeremylt 33885ac19cSjeremylt for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) { 34885ac19cSjeremylt ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr); 35885ac19cSjeremylt } 36885ac19cSjeremylt ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr); 37885ac19cSjeremylt ierr = CeedFree(&impl->qdata); CeedChk(ierr); 38885ac19cSjeremylt 39885ac19cSjeremylt ierr = CeedFree(&impl->indata); CeedChk(ierr); 40885ac19cSjeremylt ierr = CeedFree(&impl->outdata); CeedChk(ierr); 41885ac19cSjeremylt 4221617c04Sjeremylt ierr = CeedFree(&op->data); CeedChk(ierr); 4321617c04Sjeremylt return 0; 4421617c04Sjeremylt } 4521617c04Sjeremylt 46885ac19cSjeremylt /* 47885ac19cSjeremylt Setup infields or outfields 48885ac19cSjeremylt */ 49885ac19cSjeremylt static int CeedOperatorSetupFields_Ref(struct CeedQFunctionField qfields[16], 50885ac19cSjeremylt struct CeedOperatorField ofields[16], 51885ac19cSjeremylt CeedVector *evecs, CeedScalar **qdata, 52885ac19cSjeremylt CeedScalar **qdata_alloc, CeedScalar **indata, 53*135a076eSjeremylt CeedInt starti, CeedInt startq, 54*135a076eSjeremylt CeedInt numfields, CeedInt Q) { 55*135a076eSjeremylt CeedInt dim, ierr, iq=startq, ncomp; 5621617c04Sjeremylt 57885ac19cSjeremylt // Loop over fields 58885ac19cSjeremylt for (CeedInt i=0; i<numfields; i++) { 59885ac19cSjeremylt CeedEvalMode emode = qfields[i].emode; 60*135a076eSjeremylt 61*135a076eSjeremylt if (emode != CEED_EVAL_WEIGHT) { 62*135a076eSjeremylt ierr = CeedElemRestrictionCreateVector(ofields[i].Erestrict, NULL, &evecs[i+starti]); 63*135a076eSjeremylt CeedChk(ierr); 64*135a076eSjeremylt } 65*135a076eSjeremylt 66885ac19cSjeremylt switch(emode) { 67885ac19cSjeremylt case CEED_EVAL_NONE: 68885ac19cSjeremylt break; // No action 69885ac19cSjeremylt case CEED_EVAL_INTERP: 70885ac19cSjeremylt ncomp = qfields[i].ncomp; 71885ac19cSjeremylt ierr = CeedMalloc(Q*ncomp, &qdata_alloc[iq]); CeedChk(ierr); 72885ac19cSjeremylt qdata[i + starti] = qdata_alloc[iq]; 73885ac19cSjeremylt iq++; 74885ac19cSjeremylt break; 75885ac19cSjeremylt case CEED_EVAL_GRAD: 76885ac19cSjeremylt ncomp = qfields[i].ncomp; 77885ac19cSjeremylt dim = ofields[i].basis->dim; 78885ac19cSjeremylt ierr = CeedMalloc(Q*ncomp*dim, &qdata_alloc[iq]); CeedChk(ierr); 79885ac19cSjeremylt qdata[i + starti] = qdata_alloc[iq]; 80885ac19cSjeremylt iq++; 81885ac19cSjeremylt break; 82885ac19cSjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 83885ac19cSjeremylt ierr = CeedMalloc(Q, &qdata_alloc[iq]); CeedChk(ierr); 84d3181881Sjeremylt ierr = CeedBasisApply(ofields[iq].basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 85885ac19cSjeremylt NULL, qdata_alloc[iq]); CeedChk(ierr); 86885ac19cSjeremylt qdata[i] = qdata_alloc[iq]; 87885ac19cSjeremylt indata[i] = qdata[i]; 88885ac19cSjeremylt iq++; 89885ac19cSjeremylt break; 90885ac19cSjeremylt case CEED_EVAL_DIV: 91885ac19cSjeremylt break; // Not implimented 92885ac19cSjeremylt case CEED_EVAL_CURL: 93885ac19cSjeremylt break; // Not implimented 9421617c04Sjeremylt } 95885ac19cSjeremylt } 9621617c04Sjeremylt return 0; 9721617c04Sjeremylt } 9821617c04Sjeremylt 99885ac19cSjeremylt /* 100885ac19cSjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 101885ac19cSjeremylt to the named inputs and outputs of its CeedQFunction. 102885ac19cSjeremylt */ 103885ac19cSjeremylt static int CeedOperatorSetup_Ref(CeedOperator op) { 104885ac19cSjeremylt if (op->setupdone) return 0; 105885ac19cSjeremylt CeedOperator_Ref *opref = op->data; 106885ac19cSjeremylt CeedQFunction qf = op->qf; 107885ac19cSjeremylt CeedInt Q = op->numqpoints; 10821617c04Sjeremylt int ierr; 10921617c04Sjeremylt 110885ac19cSjeremylt // Count infield and outfield array sizes and evectors 111*135a076eSjeremylt opref->numein = qf->numinputfields; 112885ac19cSjeremylt for (CeedInt i=0; i<qf->numinputfields; i++) { 113885ac19cSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 114885ac19cSjeremylt opref->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) + !! 115885ac19cSjeremylt (emode & CEED_EVAL_WEIGHT); 11621617c04Sjeremylt } 117*135a076eSjeremylt opref->numeout = qf->numoutputfields; 118885ac19cSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 119885ac19cSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 120885ac19cSjeremylt opref->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD); 121885ac19cSjeremylt } 122885ac19cSjeremylt 123885ac19cSjeremylt // Allocate 124885ac19cSjeremylt ierr = CeedCalloc(opref->numein + opref->numeout, &opref->evecs); CeedChk(ierr); 125*135a076eSjeremylt ierr = CeedCalloc(opref->numein + opref->numeout, &opref->edata); 126885ac19cSjeremylt CeedChk(ierr); 127885ac19cSjeremylt 128885ac19cSjeremylt ierr = CeedCalloc(opref->numqin + opref->numqout, &opref->qdata_alloc); 129885ac19cSjeremylt CeedChk(ierr); 130885ac19cSjeremylt ierr = CeedCalloc(qf->numinputfields + qf->numoutputfields, &opref->qdata); 131885ac19cSjeremylt CeedChk(ierr); 132885ac19cSjeremylt 133885ac19cSjeremylt ierr = CeedCalloc(16, &opref->indata); CeedChk(ierr); 134885ac19cSjeremylt ierr = CeedCalloc(16, &opref->outdata); CeedChk(ierr); 135885ac19cSjeremylt 136885ac19cSjeremylt // Set up infield and outfield pointer arrays 137885ac19cSjeremylt // Infields 138885ac19cSjeremylt ierr = CeedOperatorSetupFields_Ref(qf->inputfields, op->inputfields, 139885ac19cSjeremylt opref->evecs, opref->qdata, opref->qdata_alloc, 140*135a076eSjeremylt opref->indata, 0, 0, 141885ac19cSjeremylt qf->numinputfields, Q); CeedChk(ierr); 142885ac19cSjeremylt 143885ac19cSjeremylt // Outfields 144885ac19cSjeremylt ierr = CeedOperatorSetupFields_Ref(qf->outputfields, op->outputfields, 145885ac19cSjeremylt opref->evecs, opref->qdata, opref->qdata_alloc, 146*135a076eSjeremylt opref->indata, qf->numinputfields, 147885ac19cSjeremylt opref->numqin, qf->numoutputfields, Q); CeedChk(ierr); 148885ac19cSjeremylt 1497ca8db16Sjeremylt // Output Qvecs 1507ca8db16Sjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 1517ca8db16Sjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 1527ca8db16Sjeremylt if (emode != CEED_EVAL_NONE) { 1537ca8db16Sjeremylt opref->outdata[i] = opref->qdata[i + qf->numinputfields]; 1547ca8db16Sjeremylt } 1557ca8db16Sjeremylt } 1567ca8db16Sjeremylt 157885ac19cSjeremylt op->setupdone = 1; 158885ac19cSjeremylt 159885ac19cSjeremylt return 0; 160885ac19cSjeremylt } 161885ac19cSjeremylt 162885ac19cSjeremylt static int CeedOperatorApply_Ref(CeedOperator op, CeedVector invec, 163885ac19cSjeremylt CeedVector outvec, CeedRequest *request) { 164885ac19cSjeremylt CeedOperator_Ref *opref = op->data; 165885ac19cSjeremylt CeedInt Q = op->numqpoints, elemsize; 166885ac19cSjeremylt int ierr; 167885ac19cSjeremylt CeedQFunction qf = op->qf; 168885ac19cSjeremylt CeedTransposeMode lmode = CEED_NOTRANSPOSE; 1697ca8db16Sjeremylt CeedScalar *vec_temp; 170885ac19cSjeremylt 171885ac19cSjeremylt // Setup 172885ac19cSjeremylt ierr = CeedOperatorSetup_Ref(op); CeedChk(ierr); 173885ac19cSjeremylt 174885ac19cSjeremylt // Input Evecs and Restriction 175*135a076eSjeremylt for (CeedInt i=0; i<qf->numinputfields; i++) { 176668048e2SJed Brown CeedEvalMode emode = qf->inputfields[i].emode; 177*135a076eSjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 178668048e2SJed Brown } else { 1797ca8db16Sjeremylt // Zero evec 180*135a076eSjeremylt ierr = CeedVectorGetArray(opref->evecs[i], CEED_MEM_HOST, &vec_temp); 1810f5de9e9Sjeremylt CeedChk(ierr); 182*135a076eSjeremylt for (CeedInt j=0; j<opref->evecs[i]->length; j++) 1837ca8db16Sjeremylt vec_temp[j] = 0.; 184*135a076eSjeremylt ierr = CeedVectorRestoreArray(opref->evecs[i], &vec_temp); CeedChk(ierr); 185668048e2SJed Brown // Active 186668048e2SJed Brown if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) { 187668048e2SJed Brown // Restrict 188668048e2SJed Brown ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE, 189*135a076eSjeremylt lmode, invec, opref->evecs[i], 190668048e2SJed Brown request); CeedChk(ierr); 191668048e2SJed Brown // Get evec 192*135a076eSjeremylt ierr = CeedVectorGetArrayRead(opref->evecs[i], CEED_MEM_HOST, 193668048e2SJed Brown (const CeedScalar **) &opref->edata[i]); CeedChk(ierr); 194668048e2SJed Brown } else { 195885ac19cSjeremylt // Passive 1967ca8db16Sjeremylt // Restrict 197885ac19cSjeremylt ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE, 198*135a076eSjeremylt lmode, op->inputfields[i].vec, opref->evecs[i], 199885ac19cSjeremylt request); CeedChk(ierr); 2007ca8db16Sjeremylt // Get evec 201*135a076eSjeremylt ierr = CeedVectorGetArrayRead(opref->evecs[i], CEED_MEM_HOST, 202885ac19cSjeremylt (const CeedScalar **) &opref->edata[i]); CeedChk(ierr); 203885ac19cSjeremylt } 204885ac19cSjeremylt } 205885ac19cSjeremylt } 206885ac19cSjeremylt 207885ac19cSjeremylt // Output Evecs 208*135a076eSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 209*135a076eSjeremylt ierr = CeedVectorGetArray(opref->evecs[i+opref->numein], CEED_MEM_HOST, 210668048e2SJed Brown &opref->edata[i + qf->numinputfields]); CeedChk(ierr); 211885ac19cSjeremylt } 212885ac19cSjeremylt 213885ac19cSjeremylt // Loop through elements 214885ac19cSjeremylt for (CeedInt e=0; e<op->numelements; e++) { 215885ac19cSjeremylt // Input basis apply if needed 216885ac19cSjeremylt for (CeedInt i=0; i<qf->numinputfields; i++) { 217*135a076eSjeremylt // Get elemsize, emode, ncomp 218885ac19cSjeremylt elemsize = op->inputfields[i].Erestrict->elemsize; 219885ac19cSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 220885ac19cSjeremylt CeedInt ncomp = qf->inputfields[i].ncomp; 221885ac19cSjeremylt // Basis action 222885ac19cSjeremylt switch(emode) { 223885ac19cSjeremylt case CEED_EVAL_NONE: 224885ac19cSjeremylt opref->indata[i] = &opref->edata[i][e*Q*ncomp]; 225885ac19cSjeremylt break; 226885ac19cSjeremylt case CEED_EVAL_INTERP: 227d3181881Sjeremylt ierr = CeedBasisApply(op->inputfields[i].basis, 1, CEED_NOTRANSPOSE, 228885ac19cSjeremylt CEED_EVAL_INTERP, &opref->edata[i][e*elemsize*ncomp], opref->qdata[i]); 229885ac19cSjeremylt CeedChk(ierr); 230885ac19cSjeremylt opref->indata[i] = opref->qdata[i]; 231885ac19cSjeremylt break; 232885ac19cSjeremylt case CEED_EVAL_GRAD: 233d3181881Sjeremylt ierr = CeedBasisApply(op->inputfields[i].basis, 1, CEED_NOTRANSPOSE, 234885ac19cSjeremylt CEED_EVAL_GRAD, &opref->edata[i][e*elemsize*ncomp], opref->qdata[i]); 235885ac19cSjeremylt CeedChk(ierr); 236885ac19cSjeremylt opref->indata[i] = opref->qdata[i]; 237885ac19cSjeremylt break; 238885ac19cSjeremylt case CEED_EVAL_WEIGHT: 239885ac19cSjeremylt break; // No action 240885ac19cSjeremylt case CEED_EVAL_DIV: 241885ac19cSjeremylt break; // Not implimented 242885ac19cSjeremylt case CEED_EVAL_CURL: 243885ac19cSjeremylt break; // Not implimented 244885ac19cSjeremylt } 245885ac19cSjeremylt } 246885ac19cSjeremylt // Output pointers 247885ac19cSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 248885ac19cSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 249885ac19cSjeremylt if (emode == CEED_EVAL_NONE) { 250885ac19cSjeremylt CeedInt ncomp = qf->outputfields[i].ncomp; 251885ac19cSjeremylt opref->outdata[i] = &opref->edata[i + qf->numinputfields][e*Q*ncomp]; 252885ac19cSjeremylt } 253885ac19cSjeremylt } 254885ac19cSjeremylt // Q function 255885ac19cSjeremylt ierr = CeedQFunctionApply(op->qf, Q, (const CeedScalar * const*) opref->indata, 256885ac19cSjeremylt opref->outdata); CeedChk(ierr); 257885ac19cSjeremylt 258885ac19cSjeremylt // Output basis apply if needed 259885ac19cSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 260*135a076eSjeremylt // Get elemsize, emode, ncomp 261885ac19cSjeremylt elemsize = op->outputfields[i].Erestrict->elemsize; 262885ac19cSjeremylt CeedInt ncomp = qf->outputfields[i].ncomp; 263885ac19cSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 264885ac19cSjeremylt // Basis action 265885ac19cSjeremylt switch(emode) { 266885ac19cSjeremylt case CEED_EVAL_NONE: 267885ac19cSjeremylt break; // No action 268885ac19cSjeremylt case CEED_EVAL_INTERP: 269d3181881Sjeremylt ierr = CeedBasisApply(op->outputfields[i].basis, 1, CEED_TRANSPOSE, 270885ac19cSjeremylt CEED_EVAL_INTERP, opref->outdata[i], 271885ac19cSjeremylt &opref->edata[i + qf->numinputfields][e*elemsize*ncomp]); CeedChk(ierr); 272885ac19cSjeremylt break; 273885ac19cSjeremylt case CEED_EVAL_GRAD: 2740c7a96bbSjeremylt ierr = CeedBasisApply(op->outputfields[i].basis, 1, CEED_TRANSPOSE, 2750c7a96bbSjeremylt CEED_EVAL_GRAD, 276885ac19cSjeremylt opref->outdata[i], &opref->edata[i + qf->numinputfields][e*elemsize*ncomp]); 277885ac19cSjeremylt CeedChk(ierr); 278885ac19cSjeremylt break; 279885ac19cSjeremylt case CEED_EVAL_WEIGHT: 280885ac19cSjeremylt break; // Should not occur 281885ac19cSjeremylt case CEED_EVAL_DIV: 282885ac19cSjeremylt break; // Not implimented 283885ac19cSjeremylt case CEED_EVAL_CURL: 284885ac19cSjeremylt break; // Not implimented 285885ac19cSjeremylt } 286885ac19cSjeremylt } 287885ac19cSjeremylt } 288885ac19cSjeremylt 289885ac19cSjeremylt // Output restriction 290*135a076eSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 291668048e2SJed Brown // Active 292668048e2SJed Brown if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) { 2937ca8db16Sjeremylt // Restore evec 294*135a076eSjeremylt ierr = CeedVectorRestoreArray(opref->evecs[i+opref->numein], 295885ac19cSjeremylt &opref->edata[i + qf->numinputfields]); CeedChk(ierr); 2967ca8db16Sjeremylt // Zero lvec 2977ca8db16Sjeremylt ierr = CeedVectorGetArray(outvec, CEED_MEM_HOST, &vec_temp); CeedChk(ierr); 2987ca8db16Sjeremylt for (CeedInt j=0; j<outvec->length; j++) 2997ca8db16Sjeremylt vec_temp[j] = 0.; 3007ca8db16Sjeremylt ierr = CeedVectorRestoreArray(outvec, &vec_temp); CeedChk(ierr); 3017ca8db16Sjeremylt // Restrict 302885ac19cSjeremylt ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE, 303*135a076eSjeremylt lmode, opref->evecs[i+opref->numein], outvec, request); CeedChk(ierr); 304885ac19cSjeremylt } else { 305885ac19cSjeremylt // Passive 306668048e2SJed Brown // Restore evec 307*135a076eSjeremylt ierr = CeedVectorRestoreArray(opref->evecs[i+opref->numein], 308885ac19cSjeremylt &opref->edata[i + qf->numinputfields]); CeedChk(ierr); 309668048e2SJed Brown // Zero lvec 310668048e2SJed Brown ierr = CeedVectorGetArray(op->outputfields[i].vec, CEED_MEM_HOST, &vec_temp); 311885ac19cSjeremylt CeedChk(ierr); 312668048e2SJed Brown for (CeedInt j=0; j<op->outputfields[i].vec->length; j++) 313668048e2SJed Brown vec_temp[j] = 0.; 314668048e2SJed Brown ierr = CeedVectorRestoreArray(op->outputfields[i].vec, &vec_temp); 315668048e2SJed Brown CeedChk(ierr); 316668048e2SJed Brown // Restrict 317668048e2SJed Brown ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE, 318*135a076eSjeremylt lmode, opref->evecs[i+opref->numein], op->outputfields[i].vec, 319668048e2SJed Brown request); CeedChk(ierr); 320885ac19cSjeremylt } 321885ac19cSjeremylt } 322885ac19cSjeremylt 3237ca8db16Sjeremylt // Restore input arrays 324*135a076eSjeremylt for (CeedInt i=0; i<qf->numinputfields; i++) { 3257ca8db16Sjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 326*135a076eSjeremylt if (emode == CEED_EVAL_WEIGHT) { // Skip 3277ca8db16Sjeremylt } else { 328*135a076eSjeremylt ierr = CeedVectorRestoreArrayRead(opref->evecs[i], 3297ca8db16Sjeremylt (const CeedScalar **) &opref->edata[i]); CeedChk(ierr); 3307ca8db16Sjeremylt } 3317ca8db16Sjeremylt } 3327ca8db16Sjeremylt 33321617c04Sjeremylt return 0; 33421617c04Sjeremylt } 33521617c04Sjeremylt 33621617c04Sjeremylt int CeedOperatorCreate_Ref(CeedOperator op) { 33721617c04Sjeremylt CeedOperator_Ref *impl; 33821617c04Sjeremylt int ierr; 33921617c04Sjeremylt 34021617c04Sjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 34121617c04Sjeremylt op->data = impl; 34221617c04Sjeremylt op->Destroy = CeedOperatorDestroy_Ref; 34321617c04Sjeremylt op->Apply = CeedOperatorApply_Ref; 34421617c04Sjeremylt return 0; 34521617c04Sjeremylt } 346