1*0d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*0d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*0d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 4*0d0321e0SJeremy L Thompson // 5*0d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 6*0d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 7*0d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 8*0d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 9*0d0321e0SJeremy L Thompson // 10*0d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*0d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 12*0d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 13*0d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 14*0d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 15*0d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 16*0d0321e0SJeremy L Thompson 17*0d0321e0SJeremy L Thompson #include <ceed/ceed.h> 18*0d0321e0SJeremy L Thompson #include <ceed/backend.h> 19*0d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 20*0d0321e0SJeremy L Thompson #include <assert.h> 21*0d0321e0SJeremy L Thompson #include <stdbool.h> 22*0d0321e0SJeremy L Thompson #include <string.h> 23*0d0321e0SJeremy L Thompson #include "ceed-hip-ref.h" 24*0d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 25*0d0321e0SJeremy L Thompson 26*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 27*0d0321e0SJeremy L Thompson // Destroy operator 28*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 29*0d0321e0SJeremy L Thompson static int CeedOperatorDestroy_Hip(CeedOperator op) { 30*0d0321e0SJeremy L Thompson int ierr; 31*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 32*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 33*0d0321e0SJeremy L Thompson 34*0d0321e0SJeremy L Thompson // Apply data 35*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->numein + impl->numeout; i++) { 36*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChkBackend(ierr); 37*0d0321e0SJeremy L Thompson } 38*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->evecs); CeedChkBackend(ierr); 39*0d0321e0SJeremy L Thompson 40*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->numein; i++) { 41*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChkBackend(ierr); 42*0d0321e0SJeremy L Thompson } 43*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->qvecsin); CeedChkBackend(ierr); 44*0d0321e0SJeremy L Thompson 45*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->numeout; i++) { 46*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr); 47*0d0321e0SJeremy L Thompson } 48*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->qvecsout); CeedChkBackend(ierr); 49*0d0321e0SJeremy L Thompson 50*0d0321e0SJeremy L Thompson // QFunction diagonal assembly data 51*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<impl->qfnumactivein; i++) { 52*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&impl->qfactivein[i]); CeedChkBackend(ierr); 53*0d0321e0SJeremy L Thompson } 54*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->qfactivein); CeedChkBackend(ierr); 55*0d0321e0SJeremy L Thompson 56*0d0321e0SJeremy L Thompson // Diag data 57*0d0321e0SJeremy L Thompson if (impl->diag) { 58*0d0321e0SJeremy L Thompson Ceed ceed; 59*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 60*0d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(impl->diag->module)); 61*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->diag->h_emodein); CeedChkBackend(ierr); 62*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->diag->h_emodeout); CeedChkBackend(ierr); 63*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_emodein); CeedChk_Hip(ceed, ierr); 64*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_emodeout); CeedChk_Hip(ceed, ierr); 65*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_identity); CeedChk_Hip(ceed, ierr); 66*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_interpin); CeedChk_Hip(ceed, ierr); 67*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_interpout); CeedChk_Hip(ceed, ierr); 68*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_gradin); CeedChk_Hip(ceed, ierr); 69*0d0321e0SJeremy L Thompson ierr = hipFree(impl->diag->d_gradout); CeedChk_Hip(ceed, ierr); 70*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionDestroy(&impl->diag->pbdiagrstr); 71*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 72*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&impl->diag->elemdiag); CeedChkBackend(ierr); 73*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&impl->diag->pbelemdiag); CeedChkBackend(ierr); 74*0d0321e0SJeremy L Thompson } 75*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl->diag); CeedChkBackend(ierr); 76*0d0321e0SJeremy L Thompson 77*0d0321e0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 78*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 79*0d0321e0SJeremy L Thompson } 80*0d0321e0SJeremy L Thompson 81*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 82*0d0321e0SJeremy L Thompson // Setup infields or outfields 83*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 84*0d0321e0SJeremy L Thompson static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, 85*0d0321e0SJeremy L Thompson bool isinput, CeedVector *evecs, 86*0d0321e0SJeremy L Thompson CeedVector *qvecs, CeedInt starte, 87*0d0321e0SJeremy L Thompson CeedInt numfields, CeedInt Q, 88*0d0321e0SJeremy L Thompson CeedInt numelements) { 89*0d0321e0SJeremy L Thompson CeedInt dim, ierr, size; 90*0d0321e0SJeremy L Thompson Ceed ceed; 91*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 92*0d0321e0SJeremy L Thompson CeedBasis basis; 93*0d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 94*0d0321e0SJeremy L Thompson CeedOperatorField *opfields; 95*0d0321e0SJeremy L Thompson CeedQFunctionField *qffields; 96*0d0321e0SJeremy L Thompson CeedVector fieldvec; 97*0d0321e0SJeremy L Thompson bool strided; 98*0d0321e0SJeremy L Thompson bool skiprestrict; 99*0d0321e0SJeremy L Thompson 100*0d0321e0SJeremy L Thompson if (isinput) { 101*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL); 102*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 103*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL); 104*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 105*0d0321e0SJeremy L Thompson } else { 106*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields); 107*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 108*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields); 109*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 110*0d0321e0SJeremy L Thompson } 111*0d0321e0SJeremy L Thompson 112*0d0321e0SJeremy L Thompson // Loop over fields 113*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numfields; i++) { 114*0d0321e0SJeremy L Thompson CeedEvalMode emode; 115*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChkBackend(ierr); 116*0d0321e0SJeremy L Thompson 117*0d0321e0SJeremy L Thompson strided = false; 118*0d0321e0SJeremy L Thompson skiprestrict = false; 119*0d0321e0SJeremy L Thompson if (emode != CEED_EVAL_WEIGHT) { 120*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict); 121*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 122*0d0321e0SJeremy L Thompson 123*0d0321e0SJeremy L Thompson // Check whether this field can skip the element restriction: 124*0d0321e0SJeremy L Thompson // must be passive input, with emode NONE, and have a strided restriction with 125*0d0321e0SJeremy L Thompson // CEED_STRIDES_BACKEND. 126*0d0321e0SJeremy L Thompson 127*0d0321e0SJeremy L Thompson // First, check whether the field is input or output: 128*0d0321e0SJeremy L Thompson if (isinput) { 129*0d0321e0SJeremy L Thompson // Check for passive input: 130*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opfields[i], &fieldvec); CeedChkBackend(ierr); 131*0d0321e0SJeremy L Thompson if (fieldvec != CEED_VECTOR_ACTIVE) { 132*0d0321e0SJeremy L Thompson // Check emode 133*0d0321e0SJeremy L Thompson if (emode == CEED_EVAL_NONE) { 134*0d0321e0SJeremy L Thompson // Check for strided restriction 135*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionIsStrided(Erestrict, &strided); 136*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 137*0d0321e0SJeremy L Thompson if (strided) { 138*0d0321e0SJeremy L Thompson // Check if vector is already in preferred backend ordering 139*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionHasBackendStrides(Erestrict, 140*0d0321e0SJeremy L Thompson &skiprestrict); CeedChkBackend(ierr); 141*0d0321e0SJeremy L Thompson } 142*0d0321e0SJeremy L Thompson } 143*0d0321e0SJeremy L Thompson } 144*0d0321e0SJeremy L Thompson } 145*0d0321e0SJeremy L Thompson if (skiprestrict) { 146*0d0321e0SJeremy L Thompson // We do not need an E-Vector, but will use the input field vector's data 147*0d0321e0SJeremy L Thompson // directly in the operator application. 148*0d0321e0SJeremy L Thompson evecs[i + starte] = NULL; 149*0d0321e0SJeremy L Thompson } else { 150*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionCreateVector(Erestrict, NULL, 151*0d0321e0SJeremy L Thompson &evecs[i + starte]); 152*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 153*0d0321e0SJeremy L Thompson } 154*0d0321e0SJeremy L Thompson } 155*0d0321e0SJeremy L Thompson 156*0d0321e0SJeremy L Thompson switch (emode) { 157*0d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 158*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr); 159*0d0321e0SJeremy L Thompson ierr = CeedVectorCreate(ceed, numelements * Q * size, &qvecs[i]); 160*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 161*0d0321e0SJeremy L Thompson break; 162*0d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 163*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr); 164*0d0321e0SJeremy L Thompson ierr = CeedVectorCreate(ceed, numelements * Q * size, &qvecs[i]); 165*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 166*0d0321e0SJeremy L Thompson break; 167*0d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 168*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr); 169*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr); 170*0d0321e0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 171*0d0321e0SJeremy L Thompson ierr = CeedVectorCreate(ceed, numelements * Q * size, &qvecs[i]); 172*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 173*0d0321e0SJeremy L Thompson break; 174*0d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: // Only on input fields 175*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr); 176*0d0321e0SJeremy L Thompson ierr = CeedVectorCreate(ceed, numelements * Q, &qvecs[i]); CeedChkBackend(ierr); 177*0d0321e0SJeremy L Thompson ierr = CeedBasisApply(basis, numelements, CEED_NOTRANSPOSE, 178*0d0321e0SJeremy L Thompson CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChkBackend(ierr); 179*0d0321e0SJeremy L Thompson break; 180*0d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 181*0d0321e0SJeremy L Thompson break; // TODO: Not implemented 182*0d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 183*0d0321e0SJeremy L Thompson break; // TODO: Not implemented 184*0d0321e0SJeremy L Thompson } 185*0d0321e0SJeremy L Thompson } 186*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 187*0d0321e0SJeremy L Thompson } 188*0d0321e0SJeremy L Thompson 189*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 190*0d0321e0SJeremy L Thompson // CeedOperator needs to connect all the named fields (be they active or passive) 191*0d0321e0SJeremy L Thompson // to the named inputs and outputs of its CeedQFunction. 192*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 193*0d0321e0SJeremy L Thompson static int CeedOperatorSetup_Hip(CeedOperator op) { 194*0d0321e0SJeremy L Thompson int ierr; 195*0d0321e0SJeremy L Thompson bool setupdone; 196*0d0321e0SJeremy L Thompson ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChkBackend(ierr); 197*0d0321e0SJeremy L Thompson if (setupdone) 198*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 199*0d0321e0SJeremy L Thompson Ceed ceed; 200*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 201*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 202*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 203*0d0321e0SJeremy L Thompson CeedQFunction qf; 204*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 205*0d0321e0SJeremy L Thompson CeedInt Q, numelements, numinputfields, numoutputfields; 206*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 207*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); 208*0d0321e0SJeremy L Thompson CeedOperatorField *opinputfields, *opoutputfields; 209*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, &numinputfields, &opinputfields, 210*0d0321e0SJeremy L Thompson &numoutputfields, &opoutputfields); 211*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 212*0d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, *qfoutputfields; 213*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields); 214*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 215*0d0321e0SJeremy L Thompson 216*0d0321e0SJeremy L Thompson // Allocate 217*0d0321e0SJeremy L Thompson ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 218*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 219*0d0321e0SJeremy L Thompson 220*0d0321e0SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->qvecsin); CeedChkBackend(ierr); 221*0d0321e0SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->qvecsout); CeedChkBackend(ierr); 222*0d0321e0SJeremy L Thompson 223*0d0321e0SJeremy L Thompson impl->numein = numinputfields; impl->numeout = numoutputfields; 224*0d0321e0SJeremy L Thompson 225*0d0321e0SJeremy L Thompson // Set up infield and outfield evecs and qvecs 226*0d0321e0SJeremy L Thompson // Infields 227*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetupFields_Hip(qf, op, true, 228*0d0321e0SJeremy L Thompson impl->evecs, impl->qvecsin, 0, 229*0d0321e0SJeremy L Thompson numinputfields, Q, numelements); 230*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 231*0d0321e0SJeremy L Thompson 232*0d0321e0SJeremy L Thompson // Outfields 233*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetupFields_Hip(qf, op, false, 234*0d0321e0SJeremy L Thompson impl->evecs, impl->qvecsout, 235*0d0321e0SJeremy L Thompson numinputfields, numoutputfields, Q, 236*0d0321e0SJeremy L Thompson numelements); CeedChkBackend(ierr); 237*0d0321e0SJeremy L Thompson 238*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 239*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 240*0d0321e0SJeremy L Thompson } 241*0d0321e0SJeremy L Thompson 242*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 243*0d0321e0SJeremy L Thompson // Setup Operator Inputs 244*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 245*0d0321e0SJeremy L Thompson static inline int CeedOperatorSetupInputs_Hip(CeedInt numinputfields, 246*0d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 247*0d0321e0SJeremy L Thompson CeedVector invec, const bool skipactive, CeedScalar *edata[2*CEED_FIELD_MAX], 248*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl, CeedRequest *request) { 249*0d0321e0SJeremy L Thompson CeedInt ierr; 250*0d0321e0SJeremy L Thompson CeedEvalMode emode; 251*0d0321e0SJeremy L Thompson CeedVector vec; 252*0d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 253*0d0321e0SJeremy L Thompson 254*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 255*0d0321e0SJeremy L Thompson // Get input vector 256*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 257*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 258*0d0321e0SJeremy L Thompson if (skipactive) 259*0d0321e0SJeremy L Thompson continue; 260*0d0321e0SJeremy L Thompson else 261*0d0321e0SJeremy L Thompson vec = invec; 262*0d0321e0SJeremy L Thompson } 263*0d0321e0SJeremy L Thompson 264*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 265*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 266*0d0321e0SJeremy L Thompson if (emode == CEED_EVAL_WEIGHT) { // Skip 267*0d0321e0SJeremy L Thompson } else { 268*0d0321e0SJeremy L Thompson // Get input vector 269*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 270*0d0321e0SJeremy L Thompson // Get input element restriction 271*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 272*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 273*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) 274*0d0321e0SJeremy L Thompson vec = invec; 275*0d0321e0SJeremy L Thompson // Restrict, if necessary 276*0d0321e0SJeremy L Thompson if (!impl->evecs[i]) { 277*0d0321e0SJeremy L Thompson // No restriction for this field; read data directly from vec. 278*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, 279*0d0321e0SJeremy L Thompson (const CeedScalar **) &edata[i]); 280*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 281*0d0321e0SJeremy L Thompson } else { 282*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE, vec, 283*0d0321e0SJeremy L Thompson impl->evecs[i], request); CeedChkBackend(ierr); 284*0d0321e0SJeremy L Thompson // Get evec 285*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_DEVICE, 286*0d0321e0SJeremy L Thompson (const CeedScalar **) &edata[i]); 287*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 288*0d0321e0SJeremy L Thompson } 289*0d0321e0SJeremy L Thompson } 290*0d0321e0SJeremy L Thompson } 291*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 292*0d0321e0SJeremy L Thompson } 293*0d0321e0SJeremy L Thompson 294*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 295*0d0321e0SJeremy L Thompson // Input Basis Action 296*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 297*0d0321e0SJeremy L Thompson static inline int CeedOperatorInputBasis_Hip(CeedInt numelements, 298*0d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 299*0d0321e0SJeremy L Thompson CeedInt numinputfields, const bool skipactive, 300*0d0321e0SJeremy L Thompson CeedScalar *edata[2*CEED_FIELD_MAX], CeedOperator_Hip *impl) { 301*0d0321e0SJeremy L Thompson CeedInt ierr; 302*0d0321e0SJeremy L Thompson CeedInt elemsize, size; 303*0d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 304*0d0321e0SJeremy L Thompson CeedEvalMode emode; 305*0d0321e0SJeremy L Thompson CeedBasis basis; 306*0d0321e0SJeremy L Thompson 307*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<numinputfields; i++) { 308*0d0321e0SJeremy L Thompson // Skip active input 309*0d0321e0SJeremy L Thompson if (skipactive) { 310*0d0321e0SJeremy L Thompson CeedVector vec; 311*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 312*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) 313*0d0321e0SJeremy L Thompson continue; 314*0d0321e0SJeremy L Thompson } 315*0d0321e0SJeremy L Thompson // Get elemsize, emode, size 316*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 317*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 318*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 319*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 320*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 321*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 322*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr); 323*0d0321e0SJeremy L Thompson // Basis action 324*0d0321e0SJeremy L Thompson switch (emode) { 325*0d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 326*0d0321e0SJeremy L Thompson ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_DEVICE, 327*0d0321e0SJeremy L Thompson CEED_USE_POINTER, edata[i]); CeedChkBackend(ierr); 328*0d0321e0SJeremy L Thompson break; 329*0d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 330*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 331*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 332*0d0321e0SJeremy L Thompson ierr = CeedBasisApply(basis, numelements, CEED_NOTRANSPOSE, 333*0d0321e0SJeremy L Thompson CEED_EVAL_INTERP, impl->evecs[i], 334*0d0321e0SJeremy L Thompson impl->qvecsin[i]); CeedChkBackend(ierr); 335*0d0321e0SJeremy L Thompson break; 336*0d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 337*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 338*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 339*0d0321e0SJeremy L Thompson ierr = CeedBasisApply(basis, numelements, CEED_NOTRANSPOSE, 340*0d0321e0SJeremy L Thompson CEED_EVAL_GRAD, impl->evecs[i], 341*0d0321e0SJeremy L Thompson impl->qvecsin[i]); CeedChkBackend(ierr); 342*0d0321e0SJeremy L Thompson break; 343*0d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 344*0d0321e0SJeremy L Thompson break; // No action 345*0d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 346*0d0321e0SJeremy L Thompson break; // TODO: Not implemented 347*0d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 348*0d0321e0SJeremy L Thompson break; // TODO: Not implemented 349*0d0321e0SJeremy L Thompson } 350*0d0321e0SJeremy L Thompson } 351*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 352*0d0321e0SJeremy L Thompson } 353*0d0321e0SJeremy L Thompson 354*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 355*0d0321e0SJeremy L Thompson // Restore Input Vectors 356*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 357*0d0321e0SJeremy L Thompson static inline int CeedOperatorRestoreInputs_Hip(CeedInt numinputfields, 358*0d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 359*0d0321e0SJeremy L Thompson const bool skipactive, CeedScalar *edata[2*CEED_FIELD_MAX], 360*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl) { 361*0d0321e0SJeremy L Thompson CeedInt ierr; 362*0d0321e0SJeremy L Thompson CeedEvalMode emode; 363*0d0321e0SJeremy L Thompson CeedVector vec; 364*0d0321e0SJeremy L Thompson 365*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 366*0d0321e0SJeremy L Thompson // Skip active input 367*0d0321e0SJeremy L Thompson if (skipactive) { 368*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 369*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) 370*0d0321e0SJeremy L Thompson continue; 371*0d0321e0SJeremy L Thompson } 372*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 373*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 374*0d0321e0SJeremy L Thompson if (emode == CEED_EVAL_WEIGHT) { // Skip 375*0d0321e0SJeremy L Thompson } else { 376*0d0321e0SJeremy L Thompson if (!impl->evecs[i]) { // This was a skiprestrict case 377*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 378*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(vec, 379*0d0321e0SJeremy L Thompson (const CeedScalar **)&edata[i]); 380*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 381*0d0321e0SJeremy L Thompson } else { 382*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 383*0d0321e0SJeremy L Thompson (const CeedScalar **) &edata[i]); 384*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 385*0d0321e0SJeremy L Thompson } 386*0d0321e0SJeremy L Thompson } 387*0d0321e0SJeremy L Thompson } 388*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 389*0d0321e0SJeremy L Thompson } 390*0d0321e0SJeremy L Thompson 391*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 392*0d0321e0SJeremy L Thompson // Apply and add to output 393*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 394*0d0321e0SJeremy L Thompson static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector invec, 395*0d0321e0SJeremy L Thompson CeedVector outvec, CeedRequest *request) { 396*0d0321e0SJeremy L Thompson int ierr; 397*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 398*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 399*0d0321e0SJeremy L Thompson CeedQFunction qf; 400*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 401*0d0321e0SJeremy L Thompson CeedInt Q, numelements, elemsize, numinputfields, numoutputfields, size; 402*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 403*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); 404*0d0321e0SJeremy L Thompson CeedOperatorField *opinputfields, *opoutputfields; 405*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, &numinputfields, &opinputfields, 406*0d0321e0SJeremy L Thompson &numoutputfields, &opoutputfields); 407*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 408*0d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, *qfoutputfields; 409*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields); 410*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 411*0d0321e0SJeremy L Thompson CeedEvalMode emode; 412*0d0321e0SJeremy L Thompson CeedVector vec; 413*0d0321e0SJeremy L Thompson CeedBasis basis; 414*0d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 415*0d0321e0SJeremy L Thompson CeedScalar *edata[2*CEED_FIELD_MAX]; 416*0d0321e0SJeremy L Thompson 417*0d0321e0SJeremy L Thompson // Setup 418*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetup_Hip(op); CeedChkBackend(ierr); 419*0d0321e0SJeremy L Thompson 420*0d0321e0SJeremy L Thompson // Input Evecs and Restriction 421*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetupInputs_Hip(numinputfields, qfinputfields, 422*0d0321e0SJeremy L Thompson opinputfields, invec, false, edata, 423*0d0321e0SJeremy L Thompson impl, request); CeedChkBackend(ierr); 424*0d0321e0SJeremy L Thompson 425*0d0321e0SJeremy L Thompson // Input basis apply if needed 426*0d0321e0SJeremy L Thompson ierr = CeedOperatorInputBasis_Hip(numelements, qfinputfields, opinputfields, 427*0d0321e0SJeremy L Thompson numinputfields, false, edata, impl); 428*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 429*0d0321e0SJeremy L Thompson 430*0d0321e0SJeremy L Thompson // Output pointers, as necessary 431*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 432*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 433*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 434*0d0321e0SJeremy L Thompson if (emode == CEED_EVAL_NONE) { 435*0d0321e0SJeremy L Thompson // Set the output Q-Vector to use the E-Vector data directly. 436*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(impl->evecs[i + impl->numein], CEED_MEM_DEVICE, 437*0d0321e0SJeremy L Thompson &edata[i + numinputfields]); CeedChkBackend(ierr); 438*0d0321e0SJeremy L Thompson ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_DEVICE, 439*0d0321e0SJeremy L Thompson CEED_USE_POINTER, edata[i + numinputfields]); 440*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 441*0d0321e0SJeremy L Thompson } 442*0d0321e0SJeremy L Thompson } 443*0d0321e0SJeremy L Thompson 444*0d0321e0SJeremy L Thompson // Q function 445*0d0321e0SJeremy L Thompson ierr = CeedQFunctionApply(qf, numelements * Q, impl->qvecsin, impl->qvecsout); 446*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 447*0d0321e0SJeremy L Thompson 448*0d0321e0SJeremy L Thompson // Output basis apply if needed 449*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 450*0d0321e0SJeremy L Thompson // Get elemsize, emode, size 451*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 452*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 453*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 454*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 455*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 456*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 457*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 458*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 459*0d0321e0SJeremy L Thompson // Basis action 460*0d0321e0SJeremy L Thompson switch (emode) { 461*0d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 462*0d0321e0SJeremy L Thompson break; 463*0d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 464*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 465*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 466*0d0321e0SJeremy L Thompson ierr = CeedBasisApply(basis, numelements, CEED_TRANSPOSE, 467*0d0321e0SJeremy L Thompson CEED_EVAL_INTERP, impl->qvecsout[i], 468*0d0321e0SJeremy L Thompson impl->evecs[i + impl->numein]); CeedChkBackend(ierr); 469*0d0321e0SJeremy L Thompson break; 470*0d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 471*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 472*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 473*0d0321e0SJeremy L Thompson ierr = CeedBasisApply(basis, numelements, CEED_TRANSPOSE, 474*0d0321e0SJeremy L Thompson CEED_EVAL_GRAD, impl->qvecsout[i], 475*0d0321e0SJeremy L Thompson impl->evecs[i + impl->numein]); CeedChkBackend(ierr); 476*0d0321e0SJeremy L Thompson break; 477*0d0321e0SJeremy L Thompson // LCOV_EXCL_START 478*0d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 479*0d0321e0SJeremy L Thompson Ceed ceed; 480*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 481*0d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 482*0d0321e0SJeremy L Thompson "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 483*0d0321e0SJeremy L Thompson break; // Should not occur 484*0d0321e0SJeremy L Thompson } 485*0d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 486*0d0321e0SJeremy L Thompson break; // TODO: Not implemented 487*0d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 488*0d0321e0SJeremy L Thompson break; // TODO: Not implemented 489*0d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 490*0d0321e0SJeremy L Thompson } 491*0d0321e0SJeremy L Thompson } 492*0d0321e0SJeremy L Thompson 493*0d0321e0SJeremy L Thompson // Output restriction 494*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 495*0d0321e0SJeremy L Thompson // Restore evec 496*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 497*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 498*0d0321e0SJeremy L Thompson if (emode == CEED_EVAL_NONE) { 499*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 500*0d0321e0SJeremy L Thompson &edata[i + numinputfields]); 501*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 502*0d0321e0SJeremy L Thompson } 503*0d0321e0SJeremy L Thompson // Get output vector 504*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 505*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 506*0d0321e0SJeremy L Thompson // Restrict 507*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 508*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 509*0d0321e0SJeremy L Thompson // Active 510*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) 511*0d0321e0SJeremy L Thompson vec = outvec; 512*0d0321e0SJeremy L Thompson 513*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE, 514*0d0321e0SJeremy L Thompson impl->evecs[i + impl->numein], vec, 515*0d0321e0SJeremy L Thompson request); CeedChkBackend(ierr); 516*0d0321e0SJeremy L Thompson } 517*0d0321e0SJeremy L Thompson 518*0d0321e0SJeremy L Thompson // Restore input arrays 519*0d0321e0SJeremy L Thompson ierr = CeedOperatorRestoreInputs_Hip(numinputfields, qfinputfields, 520*0d0321e0SJeremy L Thompson opinputfields, false, edata, impl); 521*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 522*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 523*0d0321e0SJeremy L Thompson } 524*0d0321e0SJeremy L Thompson 525*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 526*0d0321e0SJeremy L Thompson // Core code for assembling linear QFunction 527*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 528*0d0321e0SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Hip(CeedOperator op, 529*0d0321e0SJeremy L Thompson bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 530*0d0321e0SJeremy L Thompson CeedRequest *request) { 531*0d0321e0SJeremy L Thompson int ierr; 532*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 533*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 534*0d0321e0SJeremy L Thompson CeedQFunction qf; 535*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 536*0d0321e0SJeremy L Thompson CeedInt Q, numelements, numinputfields, numoutputfields, size; 537*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 538*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); 539*0d0321e0SJeremy L Thompson CeedOperatorField *opinputfields, *opoutputfields; 540*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, &numinputfields, &opinputfields, 541*0d0321e0SJeremy L Thompson &numoutputfields, &opoutputfields); 542*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 543*0d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, *qfoutputfields; 544*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields); 545*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 546*0d0321e0SJeremy L Thompson CeedVector vec; 547*0d0321e0SJeremy L Thompson CeedInt numactivein = impl->qfnumactivein, numactiveout = impl->qfnumactiveout; 548*0d0321e0SJeremy L Thompson CeedVector *activein = impl->qfactivein; 549*0d0321e0SJeremy L Thompson CeedScalar *a, *tmp; 550*0d0321e0SJeremy L Thompson Ceed ceed, ceedparent; 551*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 552*0d0321e0SJeremy L Thompson ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceedparent); 553*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 554*0d0321e0SJeremy L Thompson ceedparent = ceedparent ? ceedparent : ceed; 555*0d0321e0SJeremy L Thompson CeedScalar *edata[2*CEED_FIELD_MAX]; 556*0d0321e0SJeremy L Thompson 557*0d0321e0SJeremy L Thompson // Setup 558*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetup_Hip(op); CeedChkBackend(ierr); 559*0d0321e0SJeremy L Thompson 560*0d0321e0SJeremy L Thompson // Check for identity 561*0d0321e0SJeremy L Thompson bool identityqf; 562*0d0321e0SJeremy L Thompson ierr = CeedQFunctionIsIdentity(qf, &identityqf); CeedChkBackend(ierr); 563*0d0321e0SJeremy L Thompson if (identityqf) 564*0d0321e0SJeremy L Thompson // LCOV_EXCL_START 565*0d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 566*0d0321e0SJeremy L Thompson "Assembling identity QFunctions not supported"); 567*0d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 568*0d0321e0SJeremy L Thompson 569*0d0321e0SJeremy L Thompson // Input Evecs and Restriction 570*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetupInputs_Hip(numinputfields, qfinputfields, 571*0d0321e0SJeremy L Thompson opinputfields, NULL, true, edata, 572*0d0321e0SJeremy L Thompson impl, request); CeedChkBackend(ierr); 573*0d0321e0SJeremy L Thompson 574*0d0321e0SJeremy L Thompson // Count number of active input fields 575*0d0321e0SJeremy L Thompson if (!numactivein) { 576*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<numinputfields; i++) { 577*0d0321e0SJeremy L Thompson // Get input vector 578*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 579*0d0321e0SJeremy L Thompson // Check if active input 580*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 581*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr); 582*0d0321e0SJeremy L Thompson ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChkBackend(ierr); 583*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_DEVICE, &tmp); 584*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 585*0d0321e0SJeremy L Thompson ierr = CeedRealloc(numactivein + size, &activein); CeedChkBackend(ierr); 586*0d0321e0SJeremy L Thompson for (CeedInt field = 0; field < size; field++) { 587*0d0321e0SJeremy L Thompson ierr = CeedVectorCreate(ceed, Q*numelements, 588*0d0321e0SJeremy L Thompson &activein[numactivein+field]); CeedChkBackend(ierr); 589*0d0321e0SJeremy L Thompson ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_DEVICE, 590*0d0321e0SJeremy L Thompson CEED_USE_POINTER, &tmp[field*Q*numelements]); 591*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 592*0d0321e0SJeremy L Thompson } 593*0d0321e0SJeremy L Thompson numactivein += size; 594*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChkBackend(ierr); 595*0d0321e0SJeremy L Thompson } 596*0d0321e0SJeremy L Thompson } 597*0d0321e0SJeremy L Thompson impl->qfnumactivein = numactivein; 598*0d0321e0SJeremy L Thompson impl->qfactivein = activein; 599*0d0321e0SJeremy L Thompson } 600*0d0321e0SJeremy L Thompson 601*0d0321e0SJeremy L Thompson // Count number of active output fields 602*0d0321e0SJeremy L Thompson if (!numactiveout) { 603*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<numoutputfields; i++) { 604*0d0321e0SJeremy L Thompson // Get output vector 605*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 606*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 607*0d0321e0SJeremy L Thompson // Check if active output 608*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 609*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 610*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 611*0d0321e0SJeremy L Thompson numactiveout += size; 612*0d0321e0SJeremy L Thompson } 613*0d0321e0SJeremy L Thompson } 614*0d0321e0SJeremy L Thompson impl->qfnumactiveout = numactiveout; 615*0d0321e0SJeremy L Thompson } 616*0d0321e0SJeremy L Thompson 617*0d0321e0SJeremy L Thompson // Check sizes 618*0d0321e0SJeremy L Thompson if (!numactivein || !numactiveout) 619*0d0321e0SJeremy L Thompson // LCOV_EXCL_START 620*0d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 621*0d0321e0SJeremy L Thompson "Cannot assemble QFunction without active inputs " 622*0d0321e0SJeremy L Thompson "and outputs"); 623*0d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 624*0d0321e0SJeremy L Thompson 625*0d0321e0SJeremy L Thompson // Build objects if needed 626*0d0321e0SJeremy L Thompson if (build_objects) { 627*0d0321e0SJeremy L Thompson // Create output restriction 628*0d0321e0SJeremy L Thompson CeedInt strides[3] = {1, numelements*Q, Q}; /* *NOPAD* */ 629*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionCreateStrided(ceedparent, numelements, Q, 630*0d0321e0SJeremy L Thompson numactivein*numactiveout, 631*0d0321e0SJeremy L Thompson numactivein*numactiveout*numelements*Q, 632*0d0321e0SJeremy L Thompson strides, rstr); CeedChkBackend(ierr); 633*0d0321e0SJeremy L Thompson // Create assembled vector 634*0d0321e0SJeremy L Thompson ierr = CeedVectorCreate(ceedparent, numelements*Q*numactivein*numactiveout, 635*0d0321e0SJeremy L Thompson assembled); CeedChkBackend(ierr); 636*0d0321e0SJeremy L Thompson } 637*0d0321e0SJeremy L Thompson ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 638*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArray(*assembled, CEED_MEM_DEVICE, &a); 639*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 640*0d0321e0SJeremy L Thompson 641*0d0321e0SJeremy L Thompson // Input basis apply 642*0d0321e0SJeremy L Thompson ierr = CeedOperatorInputBasis_Hip(numelements, qfinputfields, opinputfields, 643*0d0321e0SJeremy L Thompson numinputfields, true, edata, impl); 644*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 645*0d0321e0SJeremy L Thompson 646*0d0321e0SJeremy L Thompson // Assemble QFunction 647*0d0321e0SJeremy L Thompson for (CeedInt in=0; in<numactivein; in++) { 648*0d0321e0SJeremy L Thompson // Set Inputs 649*0d0321e0SJeremy L Thompson ierr = CeedVectorSetValue(activein[in], 1.0); CeedChkBackend(ierr); 650*0d0321e0SJeremy L Thompson if (numactivein > 1) { 651*0d0321e0SJeremy L Thompson ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 652*0d0321e0SJeremy L Thompson 0.0); CeedChkBackend(ierr); 653*0d0321e0SJeremy L Thompson } 654*0d0321e0SJeremy L Thompson // Set Outputs 655*0d0321e0SJeremy L Thompson for (CeedInt out=0; out<numoutputfields; out++) { 656*0d0321e0SJeremy L Thompson // Get output vector 657*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 658*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 659*0d0321e0SJeremy L Thompson // Check if active output 660*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 661*0d0321e0SJeremy L Thompson CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_DEVICE, 662*0d0321e0SJeremy L Thompson CEED_USE_POINTER, a); CeedChkBackend(ierr); 663*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 664*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 665*0d0321e0SJeremy L Thompson a += size*Q*numelements; // Advance the pointer by the size of the output 666*0d0321e0SJeremy L Thompson } 667*0d0321e0SJeremy L Thompson } 668*0d0321e0SJeremy L Thompson // Apply QFunction 669*0d0321e0SJeremy L Thompson ierr = CeedQFunctionApply(qf, Q*numelements, impl->qvecsin, impl->qvecsout); 670*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 671*0d0321e0SJeremy L Thompson } 672*0d0321e0SJeremy L Thompson 673*0d0321e0SJeremy L Thompson // Un-set output Qvecs to prevent accidental overwrite of Assembled 674*0d0321e0SJeremy L Thompson for (CeedInt out=0; out<numoutputfields; out++) { 675*0d0321e0SJeremy L Thompson // Get output vector 676*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 677*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 678*0d0321e0SJeremy L Thompson // Check if active output 679*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 680*0d0321e0SJeremy L Thompson ierr = CeedVectorTakeArray(impl->qvecsout[out], CEED_MEM_DEVICE, NULL); 681*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 682*0d0321e0SJeremy L Thompson } 683*0d0321e0SJeremy L Thompson } 684*0d0321e0SJeremy L Thompson 685*0d0321e0SJeremy L Thompson // Restore input arrays 686*0d0321e0SJeremy L Thompson ierr = CeedOperatorRestoreInputs_Hip(numinputfields, qfinputfields, 687*0d0321e0SJeremy L Thompson opinputfields, true, edata, impl); 688*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 689*0d0321e0SJeremy L Thompson 690*0d0321e0SJeremy L Thompson // Restore output 691*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(*assembled, &a); CeedChkBackend(ierr); 692*0d0321e0SJeremy L Thompson 693*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 694*0d0321e0SJeremy L Thompson } 695*0d0321e0SJeremy L Thompson 696*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 697*0d0321e0SJeremy L Thompson // Assemble Linear QFunction 698*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 699*0d0321e0SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Hip(CeedOperator op, 700*0d0321e0SJeremy L Thompson CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 701*0d0321e0SJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Hip(op, true, assembled, rstr, 702*0d0321e0SJeremy L Thompson request); 703*0d0321e0SJeremy L Thompson } 704*0d0321e0SJeremy L Thompson 705*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 706*0d0321e0SJeremy L Thompson // Assemble Linear QFunction 707*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 708*0d0321e0SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Hip(CeedOperator op, 709*0d0321e0SJeremy L Thompson CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 710*0d0321e0SJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Hip(op, false, &assembled, &rstr, 711*0d0321e0SJeremy L Thompson request); 712*0d0321e0SJeremy L Thompson } 713*0d0321e0SJeremy L Thompson 714*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 715*0d0321e0SJeremy L Thompson // Diagonal assembly kernels 716*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 717*0d0321e0SJeremy L Thompson // *INDENT-OFF* 718*0d0321e0SJeremy L Thompson static const char *diagonalkernels = QUOTE( 719*0d0321e0SJeremy L Thompson 720*0d0321e0SJeremy L Thompson typedef enum { 721*0d0321e0SJeremy L Thompson /// Perform no evaluation (either because there is no data or it is already at 722*0d0321e0SJeremy L Thompson /// quadrature points) 723*0d0321e0SJeremy L Thompson CEED_EVAL_NONE = 0, 724*0d0321e0SJeremy L Thompson /// Interpolate from nodes to quadrature points 725*0d0321e0SJeremy L Thompson CEED_EVAL_INTERP = 1, 726*0d0321e0SJeremy L Thompson /// Evaluate gradients at quadrature points from input in a nodal basis 727*0d0321e0SJeremy L Thompson CEED_EVAL_GRAD = 2, 728*0d0321e0SJeremy L Thompson /// Evaluate divergence at quadrature points from input in a nodal basis 729*0d0321e0SJeremy L Thompson CEED_EVAL_DIV = 4, 730*0d0321e0SJeremy L Thompson /// Evaluate curl at quadrature points from input in a nodal basis 731*0d0321e0SJeremy L Thompson CEED_EVAL_CURL = 8, 732*0d0321e0SJeremy L Thompson /// Using no input, evaluate quadrature weights on the reference element 733*0d0321e0SJeremy L Thompson CEED_EVAL_WEIGHT = 16, 734*0d0321e0SJeremy L Thompson } CeedEvalMode; 735*0d0321e0SJeremy L Thompson 736*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 737*0d0321e0SJeremy L Thompson // Get Basis Emode Pointer 738*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 739*0d0321e0SJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr, 740*0d0321e0SJeremy L Thompson CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp, 741*0d0321e0SJeremy L Thompson const CeedScalar *grad) { 742*0d0321e0SJeremy L Thompson switch (emode) { 743*0d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 744*0d0321e0SJeremy L Thompson *basisptr = identity; 745*0d0321e0SJeremy L Thompson break; 746*0d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 747*0d0321e0SJeremy L Thompson *basisptr = interp; 748*0d0321e0SJeremy L Thompson break; 749*0d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 750*0d0321e0SJeremy L Thompson *basisptr = grad; 751*0d0321e0SJeremy L Thompson break; 752*0d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 753*0d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 754*0d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 755*0d0321e0SJeremy L Thompson break; // Caught by QF Assembly 756*0d0321e0SJeremy L Thompson } 757*0d0321e0SJeremy L Thompson } 758*0d0321e0SJeremy L Thompson 759*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 760*0d0321e0SJeremy L Thompson // Core code for diagonal assembly 761*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 762*0d0321e0SJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, 763*0d0321e0SJeremy L Thompson const CeedScalar maxnorm, const bool pointBlock, 764*0d0321e0SJeremy L Thompson const CeedScalar *identity, 765*0d0321e0SJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 766*0d0321e0SJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 767*0d0321e0SJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 768*0d0321e0SJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 769*0d0321e0SJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 770*0d0321e0SJeremy L Thompson const int tid = threadIdx.x; // running with P threads, tid is evec node 771*0d0321e0SJeremy L Thompson const CeedScalar qfvaluebound = maxnorm*1e-12; 772*0d0321e0SJeremy L Thompson 773*0d0321e0SJeremy L Thompson // Compute the diagonal of B^T D B 774*0d0321e0SJeremy L Thompson // Each element 775*0d0321e0SJeremy L Thompson for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem; 776*0d0321e0SJeremy L Thompson e += gridDim.x*blockDim.z) { 777*0d0321e0SJeremy L Thompson CeedInt dout = -1; 778*0d0321e0SJeremy L Thompson // Each basis eval mode pair 779*0d0321e0SJeremy L Thompson for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) { 780*0d0321e0SJeremy L Thompson const CeedScalar *bt = NULL; 781*0d0321e0SJeremy L Thompson if (emodeout[eout] == CEED_EVAL_GRAD) 782*0d0321e0SJeremy L Thompson dout += 1; 783*0d0321e0SJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout, 784*0d0321e0SJeremy L Thompson &gradout[dout*NQPTS*NNODES]); 785*0d0321e0SJeremy L Thompson CeedInt din = -1; 786*0d0321e0SJeremy L Thompson for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) { 787*0d0321e0SJeremy L Thompson const CeedScalar *b = NULL; 788*0d0321e0SJeremy L Thompson if (emodein[ein] == CEED_EVAL_GRAD) 789*0d0321e0SJeremy L Thompson din += 1; 790*0d0321e0SJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin, 791*0d0321e0SJeremy L Thompson &gradin[din*NQPTS*NNODES]); 792*0d0321e0SJeremy L Thompson // Each component 793*0d0321e0SJeremy L Thompson for (CeedInt compOut = 0; compOut < NCOMP; compOut++) { 794*0d0321e0SJeremy L Thompson // Each qpoint/node pair 795*0d0321e0SJeremy L Thompson if (pointBlock) { 796*0d0321e0SJeremy L Thompson // Point Block Diagonal 797*0d0321e0SJeremy L Thompson for (CeedInt compIn = 0; compIn < NCOMP; compIn++) { 798*0d0321e0SJeremy L Thompson CeedScalar evalue = 0.; 799*0d0321e0SJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 800*0d0321e0SJeremy L Thompson const CeedScalar qfvalue = 801*0d0321e0SJeremy L Thompson assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* 802*0d0321e0SJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 803*0d0321e0SJeremy L Thompson if (abs(qfvalue) > qfvaluebound) 804*0d0321e0SJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 805*0d0321e0SJeremy L Thompson } 806*0d0321e0SJeremy L Thompson elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; 807*0d0321e0SJeremy L Thompson } 808*0d0321e0SJeremy L Thompson } else { 809*0d0321e0SJeremy L Thompson // Diagonal Only 810*0d0321e0SJeremy L Thompson CeedScalar evalue = 0.; 811*0d0321e0SJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 812*0d0321e0SJeremy L Thompson const CeedScalar qfvalue = 813*0d0321e0SJeremy L Thompson assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* 814*0d0321e0SJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 815*0d0321e0SJeremy L Thompson if (abs(qfvalue) > qfvaluebound) 816*0d0321e0SJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 817*0d0321e0SJeremy L Thompson } 818*0d0321e0SJeremy L Thompson elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; 819*0d0321e0SJeremy L Thompson } 820*0d0321e0SJeremy L Thompson } 821*0d0321e0SJeremy L Thompson } 822*0d0321e0SJeremy L Thompson } 823*0d0321e0SJeremy L Thompson } 824*0d0321e0SJeremy L Thompson } 825*0d0321e0SJeremy L Thompson 826*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 827*0d0321e0SJeremy L Thompson // Linear diagonal 828*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 829*0d0321e0SJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, 830*0d0321e0SJeremy L Thompson const CeedScalar maxnorm, const CeedScalar *identity, 831*0d0321e0SJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 832*0d0321e0SJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 833*0d0321e0SJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 834*0d0321e0SJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 835*0d0321e0SJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 836*0d0321e0SJeremy L Thompson diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout, 837*0d0321e0SJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 838*0d0321e0SJeremy L Thompson } 839*0d0321e0SJeremy L Thompson 840*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 841*0d0321e0SJeremy L Thompson // Linear point block diagonal 842*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 843*0d0321e0SJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, 844*0d0321e0SJeremy L Thompson const CeedScalar maxnorm, const CeedScalar *identity, 845*0d0321e0SJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 846*0d0321e0SJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 847*0d0321e0SJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 848*0d0321e0SJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 849*0d0321e0SJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 850*0d0321e0SJeremy L Thompson diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout, 851*0d0321e0SJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 852*0d0321e0SJeremy L Thompson } 853*0d0321e0SJeremy L Thompson 854*0d0321e0SJeremy L Thompson ); 855*0d0321e0SJeremy L Thompson // *INDENT-ON* 856*0d0321e0SJeremy L Thompson 857*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 858*0d0321e0SJeremy L Thompson // Create point block restriction 859*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 860*0d0321e0SJeremy L Thompson static int CreatePBRestriction(CeedElemRestriction rstr, 861*0d0321e0SJeremy L Thompson CeedElemRestriction *pbRstr) { 862*0d0321e0SJeremy L Thompson int ierr; 863*0d0321e0SJeremy L Thompson Ceed ceed; 864*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 865*0d0321e0SJeremy L Thompson const CeedInt *offsets; 866*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 867*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 868*0d0321e0SJeremy L Thompson 869*0d0321e0SJeremy L Thompson // Expand offsets 870*0d0321e0SJeremy L Thompson CeedInt nelem, ncomp, elemsize, compstride, max = 1, *pbOffsets; 871*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetNumElements(rstr, &nelem); CeedChkBackend(ierr); 872*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetNumComponents(rstr, &ncomp); CeedChkBackend(ierr); 873*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetElementSize(rstr, &elemsize); CeedChkBackend(ierr); 874*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetCompStride(rstr, &compstride); 875*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 876*0d0321e0SJeremy L Thompson CeedInt shift = ncomp; 877*0d0321e0SJeremy L Thompson if (compstride != 1) 878*0d0321e0SJeremy L Thompson shift *= ncomp; 879*0d0321e0SJeremy L Thompson ierr = CeedCalloc(nelem*elemsize, &pbOffsets); CeedChkBackend(ierr); 880*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < nelem*elemsize; i++) { 881*0d0321e0SJeremy L Thompson pbOffsets[i] = offsets[i]*shift; 882*0d0321e0SJeremy L Thompson if (pbOffsets[i] > max) 883*0d0321e0SJeremy L Thompson max = pbOffsets[i]; 884*0d0321e0SJeremy L Thompson } 885*0d0321e0SJeremy L Thompson 886*0d0321e0SJeremy L Thompson // Create new restriction 887*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionCreate(ceed, nelem, elemsize, ncomp*ncomp, 1, 888*0d0321e0SJeremy L Thompson max + ncomp*ncomp, CEED_MEM_HOST, 889*0d0321e0SJeremy L Thompson CEED_OWN_POINTER, pbOffsets, pbRstr); 890*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 891*0d0321e0SJeremy L Thompson 892*0d0321e0SJeremy L Thompson // Cleanup 893*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChkBackend(ierr); 894*0d0321e0SJeremy L Thompson 895*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 896*0d0321e0SJeremy L Thompson } 897*0d0321e0SJeremy L Thompson 898*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 899*0d0321e0SJeremy L Thompson // Assemble diagonal setup 900*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 901*0d0321e0SJeremy L Thompson static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op, 902*0d0321e0SJeremy L Thompson const bool pointBlock) { 903*0d0321e0SJeremy L Thompson int ierr; 904*0d0321e0SJeremy L Thompson Ceed ceed; 905*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 906*0d0321e0SJeremy L Thompson CeedQFunction qf; 907*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 908*0d0321e0SJeremy L Thompson CeedInt numinputfields, numoutputfields; 909*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 910*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 911*0d0321e0SJeremy L Thompson 912*0d0321e0SJeremy L Thompson // Determine active input basis 913*0d0321e0SJeremy L Thompson CeedOperatorField *opfields; 914*0d0321e0SJeremy L Thompson CeedQFunctionField *qffields; 915*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL); 916*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 917*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL); 918*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 919*0d0321e0SJeremy L Thompson CeedInt numemodein = 0, ncomp = 0, dim = 1; 920*0d0321e0SJeremy L Thompson CeedEvalMode *emodein = NULL; 921*0d0321e0SJeremy L Thompson CeedBasis basisin = NULL; 922*0d0321e0SJeremy L Thompson CeedElemRestriction rstrin = NULL; 923*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 924*0d0321e0SJeremy L Thompson CeedVector vec; 925*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opfields[i], &vec); CeedChkBackend(ierr); 926*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 927*0d0321e0SJeremy L Thompson CeedElemRestriction rstr; 928*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opfields[i], &basisin); CeedChkBackend(ierr); 929*0d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basisin, &ncomp); CeedChkBackend(ierr); 930*0d0321e0SJeremy L Thompson ierr = CeedBasisGetDimension(basisin, &dim); CeedChkBackend(ierr); 931*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &rstr); 932*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 933*0d0321e0SJeremy L Thompson if (rstrin && rstrin != rstr) 934*0d0321e0SJeremy L Thompson // LCOV_EXCL_START 935*0d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 936*0d0321e0SJeremy L Thompson "Multi-field non-composite operator diagonal assembly not supported"); 937*0d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 938*0d0321e0SJeremy L Thompson rstrin = rstr; 939*0d0321e0SJeremy L Thompson CeedEvalMode emode; 940*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); 941*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 942*0d0321e0SJeremy L Thompson switch (emode) { 943*0d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 944*0d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 945*0d0321e0SJeremy L Thompson ierr = CeedRealloc(numemodein + 1, &emodein); CeedChkBackend(ierr); 946*0d0321e0SJeremy L Thompson emodein[numemodein] = emode; 947*0d0321e0SJeremy L Thompson numemodein += 1; 948*0d0321e0SJeremy L Thompson break; 949*0d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 950*0d0321e0SJeremy L Thompson ierr = CeedRealloc(numemodein + dim, &emodein); CeedChkBackend(ierr); 951*0d0321e0SJeremy L Thompson for (CeedInt d = 0; d < dim; d++) 952*0d0321e0SJeremy L Thompson emodein[numemodein+d] = emode; 953*0d0321e0SJeremy L Thompson numemodein += dim; 954*0d0321e0SJeremy L Thompson break; 955*0d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 956*0d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 957*0d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 958*0d0321e0SJeremy L Thompson break; // Caught by QF Assembly 959*0d0321e0SJeremy L Thompson } 960*0d0321e0SJeremy L Thompson } 961*0d0321e0SJeremy L Thompson } 962*0d0321e0SJeremy L Thompson 963*0d0321e0SJeremy L Thompson // Determine active output basis 964*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields); 965*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 966*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields); 967*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 968*0d0321e0SJeremy L Thompson CeedInt numemodeout = 0; 969*0d0321e0SJeremy L Thompson CeedEvalMode *emodeout = NULL; 970*0d0321e0SJeremy L Thompson CeedBasis basisout = NULL; 971*0d0321e0SJeremy L Thompson CeedElemRestriction rstrout = NULL; 972*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 973*0d0321e0SJeremy L Thompson CeedVector vec; 974*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opfields[i], &vec); CeedChkBackend(ierr); 975*0d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 976*0d0321e0SJeremy L Thompson CeedElemRestriction rstr; 977*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetBasis(opfields[i], &basisout); CeedChkBackend(ierr); 978*0d0321e0SJeremy L Thompson ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &rstr); 979*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 980*0d0321e0SJeremy L Thompson if (rstrout && rstrout != rstr) 981*0d0321e0SJeremy L Thompson // LCOV_EXCL_START 982*0d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 983*0d0321e0SJeremy L Thompson "Multi-field non-composite operator diagonal assembly not supported"); 984*0d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 985*0d0321e0SJeremy L Thompson rstrout = rstr; 986*0d0321e0SJeremy L Thompson CeedEvalMode emode; 987*0d0321e0SJeremy L Thompson ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChkBackend(ierr); 988*0d0321e0SJeremy L Thompson switch (emode) { 989*0d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 990*0d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 991*0d0321e0SJeremy L Thompson ierr = CeedRealloc(numemodeout + 1, &emodeout); CeedChkBackend(ierr); 992*0d0321e0SJeremy L Thompson emodeout[numemodeout] = emode; 993*0d0321e0SJeremy L Thompson numemodeout += 1; 994*0d0321e0SJeremy L Thompson break; 995*0d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 996*0d0321e0SJeremy L Thompson ierr = CeedRealloc(numemodeout + dim, &emodeout); CeedChkBackend(ierr); 997*0d0321e0SJeremy L Thompson for (CeedInt d = 0; d < dim; d++) 998*0d0321e0SJeremy L Thompson emodeout[numemodeout+d] = emode; 999*0d0321e0SJeremy L Thompson numemodeout += dim; 1000*0d0321e0SJeremy L Thompson break; 1001*0d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 1002*0d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 1003*0d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 1004*0d0321e0SJeremy L Thompson break; // Caught by QF Assembly 1005*0d0321e0SJeremy L Thompson } 1006*0d0321e0SJeremy L Thompson } 1007*0d0321e0SJeremy L Thompson } 1008*0d0321e0SJeremy L Thompson 1009*0d0321e0SJeremy L Thompson // Operator data struct 1010*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 1011*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 1012*0d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &impl->diag); CeedChkBackend(ierr); 1013*0d0321e0SJeremy L Thompson CeedOperatorDiag_Hip *diag = impl->diag; 1014*0d0321e0SJeremy L Thompson diag->basisin = basisin; 1015*0d0321e0SJeremy L Thompson diag->basisout = basisout; 1016*0d0321e0SJeremy L Thompson diag->h_emodein = emodein; 1017*0d0321e0SJeremy L Thompson diag->h_emodeout = emodeout; 1018*0d0321e0SJeremy L Thompson diag->numemodein = numemodein; 1019*0d0321e0SJeremy L Thompson diag->numemodeout = numemodeout; 1020*0d0321e0SJeremy L Thompson 1021*0d0321e0SJeremy L Thompson // Assemble kernel 1022*0d0321e0SJeremy L Thompson CeedInt nnodes, nqpts; 1023*0d0321e0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basisin, &nnodes); CeedChkBackend(ierr); 1024*0d0321e0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basisin, &nqpts); CeedChkBackend(ierr); 1025*0d0321e0SJeremy L Thompson diag->nnodes = nnodes; 1026*0d0321e0SJeremy L Thompson ierr = CeedCompileHip(ceed, diagonalkernels, &diag->module, 5, 1027*0d0321e0SJeremy L Thompson "NUMEMODEIN", numemodein, 1028*0d0321e0SJeremy L Thompson "NUMEMODEOUT", numemodeout, 1029*0d0321e0SJeremy L Thompson "NNODES", nnodes, 1030*0d0321e0SJeremy L Thompson "NQPTS", nqpts, 1031*0d0321e0SJeremy L Thompson "NCOMP", ncomp 1032*0d0321e0SJeremy L Thompson ); CeedChk_Hip(ceed, ierr); 1033*0d0321e0SJeremy L Thompson ierr = CeedGetKernelHip(ceed, diag->module, "linearDiagonal", 1034*0d0321e0SJeremy L Thompson &diag->linearDiagonal); CeedChk_Hip(ceed, ierr); 1035*0d0321e0SJeremy L Thompson ierr = CeedGetKernelHip(ceed, diag->module, "linearPointBlockDiagonal", 1036*0d0321e0SJeremy L Thompson &diag->linearPointBlock); 1037*0d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 1038*0d0321e0SJeremy L Thompson 1039*0d0321e0SJeremy L Thompson // Basis matrices 1040*0d0321e0SJeremy L Thompson const CeedInt qBytes = nqpts * sizeof(CeedScalar); 1041*0d0321e0SJeremy L Thompson const CeedInt iBytes = qBytes * nnodes; 1042*0d0321e0SJeremy L Thompson const CeedInt gBytes = qBytes * nnodes * dim; 1043*0d0321e0SJeremy L Thompson const CeedInt eBytes = sizeof(CeedEvalMode); 1044*0d0321e0SJeremy L Thompson const CeedScalar *interpin, *interpout, *gradin, *gradout; 1045*0d0321e0SJeremy L Thompson 1046*0d0321e0SJeremy L Thompson // CEED_EVAL_NONE 1047*0d0321e0SJeremy L Thompson CeedScalar *identity = NULL; 1048*0d0321e0SJeremy L Thompson bool evalNone = false; 1049*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<numemodein; i++) 1050*0d0321e0SJeremy L Thompson evalNone = evalNone || (emodein[i] == CEED_EVAL_NONE); 1051*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<numemodeout; i++) 1052*0d0321e0SJeremy L Thompson evalNone = evalNone || (emodeout[i] == CEED_EVAL_NONE); 1053*0d0321e0SJeremy L Thompson if (evalNone) { 1054*0d0321e0SJeremy L Thompson ierr = CeedCalloc(nqpts*nnodes, &identity); CeedChkBackend(ierr); 1055*0d0321e0SJeremy L Thompson for (CeedInt i=0; i<(nnodes<nqpts?nnodes:nqpts); i++) 1056*0d0321e0SJeremy L Thompson identity[i*nnodes+i] = 1.0; 1057*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_identity, iBytes); CeedChk_Hip(ceed, ierr); 1058*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_identity, identity, iBytes, 1059*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1060*0d0321e0SJeremy L Thompson } 1061*0d0321e0SJeremy L Thompson 1062*0d0321e0SJeremy L Thompson // CEED_EVAL_INTERP 1063*0d0321e0SJeremy L Thompson ierr = CeedBasisGetInterp(basisin, &interpin); CeedChkBackend(ierr); 1064*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_interpin, iBytes); CeedChk_Hip(ceed, ierr); 1065*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_interpin, interpin, iBytes, 1066*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1067*0d0321e0SJeremy L Thompson ierr = CeedBasisGetInterp(basisout, &interpout); CeedChkBackend(ierr); 1068*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_interpout, iBytes); CeedChk_Hip(ceed, ierr); 1069*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_interpout, interpout, iBytes, 1070*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1071*0d0321e0SJeremy L Thompson 1072*0d0321e0SJeremy L Thompson // CEED_EVAL_GRAD 1073*0d0321e0SJeremy L Thompson ierr = CeedBasisGetGrad(basisin, &gradin); CeedChkBackend(ierr); 1074*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_gradin, gBytes); CeedChk_Hip(ceed, ierr); 1075*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_gradin, gradin, gBytes, 1076*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1077*0d0321e0SJeremy L Thompson ierr = CeedBasisGetGrad(basisout, &gradout); CeedChkBackend(ierr); 1078*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_gradout, gBytes); CeedChk_Hip(ceed, ierr); 1079*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_gradout, gradout, gBytes, 1080*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1081*0d0321e0SJeremy L Thompson 1082*0d0321e0SJeremy L Thompson // Arrays of emodes 1083*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_emodein, numemodein * eBytes); 1084*0d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 1085*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_emodein, emodein, numemodein * eBytes, 1086*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1087*0d0321e0SJeremy L Thompson ierr = hipMalloc((void **)&diag->d_emodeout, numemodeout * eBytes); 1088*0d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 1089*0d0321e0SJeremy L Thompson ierr = hipMemcpy(diag->d_emodeout, emodeout, numemodeout * eBytes, 1090*0d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 1091*0d0321e0SJeremy L Thompson 1092*0d0321e0SJeremy L Thompson // Restriction 1093*0d0321e0SJeremy L Thompson diag->diagrstr = rstrout; 1094*0d0321e0SJeremy L Thompson 1095*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1096*0d0321e0SJeremy L Thompson } 1097*0d0321e0SJeremy L Thompson 1098*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1099*0d0321e0SJeremy L Thompson // Assemble diagonal common code 1100*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1101*0d0321e0SJeremy L Thompson static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, 1102*0d0321e0SJeremy L Thompson CeedVector assembled, CeedRequest *request, const bool pointBlock) { 1103*0d0321e0SJeremy L Thompson int ierr; 1104*0d0321e0SJeremy L Thompson Ceed ceed; 1105*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 1106*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 1107*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 1108*0d0321e0SJeremy L Thompson 1109*0d0321e0SJeremy L Thompson // Assemble QFunction 1110*0d0321e0SJeremy L Thompson CeedVector assembledqf; 1111*0d0321e0SJeremy L Thompson CeedElemRestriction rstr; 1112*0d0321e0SJeremy L Thompson ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembledqf, 1113*0d0321e0SJeremy L Thompson &rstr, request); CeedChkBackend(ierr); 1114*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionDestroy(&rstr); CeedChkBackend(ierr); 1115*0d0321e0SJeremy L Thompson CeedScalar maxnorm = 0; 1116*0d0321e0SJeremy L Thompson ierr = CeedVectorNorm(assembledqf, CEED_NORM_MAX, &maxnorm); 1117*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1118*0d0321e0SJeremy L Thompson 1119*0d0321e0SJeremy L Thompson // Setup 1120*0d0321e0SJeremy L Thompson if (!impl->diag) { 1121*0d0321e0SJeremy L Thompson ierr = CeedOperatorAssembleDiagonalSetup_Hip(op, pointBlock); 1122*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1123*0d0321e0SJeremy L Thompson } 1124*0d0321e0SJeremy L Thompson CeedOperatorDiag_Hip *diag = impl->diag; 1125*0d0321e0SJeremy L Thompson assert(diag != NULL); 1126*0d0321e0SJeremy L Thompson 1127*0d0321e0SJeremy L Thompson // Restriction 1128*0d0321e0SJeremy L Thompson if (pointBlock && !diag->pbdiagrstr) { 1129*0d0321e0SJeremy L Thompson CeedElemRestriction pbdiagrstr; 1130*0d0321e0SJeremy L Thompson ierr = CreatePBRestriction(diag->diagrstr, &pbdiagrstr); CeedChkBackend(ierr); 1131*0d0321e0SJeremy L Thompson diag->pbdiagrstr = pbdiagrstr; 1132*0d0321e0SJeremy L Thompson } 1133*0d0321e0SJeremy L Thompson CeedElemRestriction diagrstr = pointBlock ? diag->pbdiagrstr : diag->diagrstr; 1134*0d0321e0SJeremy L Thompson 1135*0d0321e0SJeremy L Thompson // Create diagonal vector 1136*0d0321e0SJeremy L Thompson CeedVector elemdiag = pointBlock ? diag->pbelemdiag : diag->elemdiag; 1137*0d0321e0SJeremy L Thompson if (!elemdiag) { 1138*0d0321e0SJeremy L Thompson // Element diagonal vector 1139*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionCreateVector(diagrstr, NULL, &elemdiag); 1140*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1141*0d0321e0SJeremy L Thompson if (pointBlock) 1142*0d0321e0SJeremy L Thompson diag->pbelemdiag = elemdiag; 1143*0d0321e0SJeremy L Thompson else 1144*0d0321e0SJeremy L Thompson diag->elemdiag = elemdiag; 1145*0d0321e0SJeremy L Thompson } 1146*0d0321e0SJeremy L Thompson ierr = CeedVectorSetValue(elemdiag, 0.0); CeedChkBackend(ierr); 1147*0d0321e0SJeremy L Thompson 1148*0d0321e0SJeremy L Thompson // Assemble element operator diagonals 1149*0d0321e0SJeremy L Thompson CeedScalar *elemdiagarray; 1150*0d0321e0SJeremy L Thompson const CeedScalar *assembledqfarray; 1151*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArray(elemdiag, CEED_MEM_DEVICE, &elemdiagarray); 1152*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1153*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(assembledqf, CEED_MEM_DEVICE, &assembledqfarray); 1154*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1155*0d0321e0SJeremy L Thompson CeedInt nelem; 1156*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionGetNumElements(diagrstr, &nelem); 1157*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1158*0d0321e0SJeremy L Thompson 1159*0d0321e0SJeremy L Thompson // Compute the diagonal of B^T D B 1160*0d0321e0SJeremy L Thompson int elemsPerBlock = 1; 1161*0d0321e0SJeremy L Thompson int grid = nelem/elemsPerBlock+((nelem/elemsPerBlock*elemsPerBlock<nelem)?1:0); 1162*0d0321e0SJeremy L Thompson void *args[] = {(void *) &nelem, (void *) &maxnorm, &diag->d_identity, 1163*0d0321e0SJeremy L Thompson &diag->d_interpin, &diag->d_gradin, &diag->d_interpout, 1164*0d0321e0SJeremy L Thompson &diag->d_gradout, &diag->d_emodein, &diag->d_emodeout, 1165*0d0321e0SJeremy L Thompson &assembledqfarray, &elemdiagarray 1166*0d0321e0SJeremy L Thompson }; 1167*0d0321e0SJeremy L Thompson if (pointBlock) { 1168*0d0321e0SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, diag->linearPointBlock, grid, 1169*0d0321e0SJeremy L Thompson diag->nnodes, 1, elemsPerBlock, args); 1170*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1171*0d0321e0SJeremy L Thompson } else { 1172*0d0321e0SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, diag->linearDiagonal, grid, 1173*0d0321e0SJeremy L Thompson diag->nnodes, 1, elemsPerBlock, args); 1174*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1175*0d0321e0SJeremy L Thompson } 1176*0d0321e0SJeremy L Thompson 1177*0d0321e0SJeremy L Thompson // Restore arrays 1178*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(elemdiag, &elemdiagarray); CeedChkBackend(ierr); 1179*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(assembledqf, &assembledqfarray); 1180*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1181*0d0321e0SJeremy L Thompson 1182*0d0321e0SJeremy L Thompson // Assemble local operator diagonal 1183*0d0321e0SJeremy L Thompson ierr = CeedElemRestrictionApply(diagrstr, CEED_TRANSPOSE, elemdiag, 1184*0d0321e0SJeremy L Thompson assembled, request); CeedChkBackend(ierr); 1185*0d0321e0SJeremy L Thompson 1186*0d0321e0SJeremy L Thompson // Cleanup 1187*0d0321e0SJeremy L Thompson ierr = CeedVectorDestroy(&assembledqf); CeedChkBackend(ierr); 1188*0d0321e0SJeremy L Thompson 1189*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1190*0d0321e0SJeremy L Thompson } 1191*0d0321e0SJeremy L Thompson 1192*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1193*0d0321e0SJeremy L Thompson // Assemble composite diagonal common code 1194*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1195*0d0321e0SJeremy L Thompson static inline int CeedOperatorLinearAssembleAddDiagonalCompositeCore_Hip( 1196*0d0321e0SJeremy L Thompson CeedOperator op, CeedVector assembled, CeedRequest *request, 1197*0d0321e0SJeremy L Thompson const bool pointBlock) { 1198*0d0321e0SJeremy L Thompson int ierr; 1199*0d0321e0SJeremy L Thompson CeedInt numSub; 1200*0d0321e0SJeremy L Thompson CeedOperator *subOperators; 1201*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetNumSub(op, &numSub); CeedChkBackend(ierr); 1202*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetSubList(op, &subOperators); CeedChkBackend(ierr); 1203*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numSub; i++) { 1204*0d0321e0SJeremy L Thompson ierr = CeedOperatorAssembleDiagonalCore_Hip(subOperators[i], assembled, 1205*0d0321e0SJeremy L Thompson request, pointBlock); CeedChkBackend(ierr); 1206*0d0321e0SJeremy L Thompson } 1207*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1208*0d0321e0SJeremy L Thompson } 1209*0d0321e0SJeremy L Thompson 1210*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1211*0d0321e0SJeremy L Thompson // Assemble Linear Diagonal 1212*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1213*0d0321e0SJeremy L Thompson static int CeedOperatorLinearAssembleAddDiagonal_Hip(CeedOperator op, 1214*0d0321e0SJeremy L Thompson CeedVector assembled, CeedRequest *request) { 1215*0d0321e0SJeremy L Thompson int ierr; 1216*0d0321e0SJeremy L Thompson bool isComposite; 1217*0d0321e0SJeremy L Thompson ierr = CeedOperatorIsComposite(op, &isComposite); CeedChkBackend(ierr); 1218*0d0321e0SJeremy L Thompson if (isComposite) { 1219*0d0321e0SJeremy L Thompson return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Hip(op, assembled, 1220*0d0321e0SJeremy L Thompson request, false); 1221*0d0321e0SJeremy L Thompson } else { 1222*0d0321e0SJeremy L Thompson return CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, false); 1223*0d0321e0SJeremy L Thompson } 1224*0d0321e0SJeremy L Thompson } 1225*0d0321e0SJeremy L Thompson 1226*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1227*0d0321e0SJeremy L Thompson // Assemble Linear Point Block Diagonal 1228*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1229*0d0321e0SJeremy L Thompson static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op, 1230*0d0321e0SJeremy L Thompson CeedVector assembled, CeedRequest *request) { 1231*0d0321e0SJeremy L Thompson int ierr; 1232*0d0321e0SJeremy L Thompson bool isComposite; 1233*0d0321e0SJeremy L Thompson ierr = CeedOperatorIsComposite(op, &isComposite); CeedChkBackend(ierr); 1234*0d0321e0SJeremy L Thompson if (isComposite) { 1235*0d0321e0SJeremy L Thompson return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Hip(op, assembled, 1236*0d0321e0SJeremy L Thompson request, true); 1237*0d0321e0SJeremy L Thompson } else { 1238*0d0321e0SJeremy L Thompson return CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, true); 1239*0d0321e0SJeremy L Thompson } 1240*0d0321e0SJeremy L Thompson } 1241*0d0321e0SJeremy L Thompson 1242*0d0321e0SJeremy L Thompson 1243*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1244*0d0321e0SJeremy L Thompson // Create operator 1245*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1246*0d0321e0SJeremy L Thompson int CeedOperatorCreate_Hip(CeedOperator op) { 1247*0d0321e0SJeremy L Thompson int ierr; 1248*0d0321e0SJeremy L Thompson Ceed ceed; 1249*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 1250*0d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 1251*0d0321e0SJeremy L Thompson 1252*0d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 1253*0d0321e0SJeremy L Thompson ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 1254*0d0321e0SJeremy L Thompson 1255*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 1256*0d0321e0SJeremy L Thompson CeedOperatorLinearAssembleQFunction_Hip); 1257*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1258*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, 1259*0d0321e0SJeremy L Thompson "LinearAssembleQFunctionUpdate", 1260*0d0321e0SJeremy L Thompson CeedOperatorLinearAssembleQFunctionUpdate_Hip); 1261*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1262*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", 1263*0d0321e0SJeremy L Thompson CeedOperatorLinearAssembleAddDiagonal_Hip); 1264*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1265*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, 1266*0d0321e0SJeremy L Thompson "LinearAssembleAddPointBlockDiagonal", 1267*0d0321e0SJeremy L Thompson CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip); 1268*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1269*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 1270*0d0321e0SJeremy L Thompson CeedOperatorApplyAdd_Hip); CeedChkBackend(ierr); 1271*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 1272*0d0321e0SJeremy L Thompson CeedOperatorDestroy_Hip); CeedChkBackend(ierr); 1273*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1274*0d0321e0SJeremy L Thompson } 1275*0d0321e0SJeremy L Thompson 1276*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1277*0d0321e0SJeremy L Thompson // Composite Operator Create 1278*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1279*0d0321e0SJeremy L Thompson int CeedCompositeOperatorCreate_Hip(CeedOperator op) { 1280*0d0321e0SJeremy L Thompson int ierr; 1281*0d0321e0SJeremy L Thompson Ceed ceed; 1282*0d0321e0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 1283*0d0321e0SJeremy L Thompson 1284*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", 1285*0d0321e0SJeremy L Thompson CeedOperatorLinearAssembleAddDiagonal_Hip); 1286*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1287*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, 1288*0d0321e0SJeremy L Thompson "LinearAssembleAddPointBlockDiagonal", 1289*0d0321e0SJeremy L Thompson CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip); 1290*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1291*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1292*0d0321e0SJeremy L Thompson } 1293*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1294