13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 8*2b730f8bSJeremy L Thompson #include <assert.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10*2b730f8bSJeremy L Thompson #include <ceed/ceed.h> 1107b31e0eSJeremy L Thompson #include <ceed/jit-tools.h> 120d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 130d0321e0SJeremy L Thompson #include <stdbool.h> 140d0321e0SJeremy L Thompson #include <string.h> 15*2b730f8bSJeremy L Thompson 160d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 17*2b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 180d0321e0SJeremy L Thompson 190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 200d0321e0SJeremy L Thompson // Destroy operator 210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 220d0321e0SJeremy L Thompson static int CeedOperatorDestroy_Hip(CeedOperator op) { 230d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 24*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 250d0321e0SJeremy L Thompson 260d0321e0SJeremy L Thompson // Apply data 270d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->numein + impl->numeout; i++) { 28*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->evecs[i])); 290d0321e0SJeremy L Thompson } 30*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->evecs)); 310d0321e0SJeremy L Thompson 320d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->numein; i++) { 33*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->qvecsin[i])); 340d0321e0SJeremy L Thompson } 35*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->qvecsin)); 360d0321e0SJeremy L Thompson 370d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->numeout; i++) { 38*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->qvecsout[i])); 390d0321e0SJeremy L Thompson } 40*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->qvecsout)); 410d0321e0SJeremy L Thompson 420d0321e0SJeremy L Thompson // QFunction diagonal assembly data 430d0321e0SJeremy L Thompson for (CeedInt i = 0; i < impl->qfnumactivein; i++) { 44*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->qfactivein[i])); 450d0321e0SJeremy L Thompson } 46*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->qfactivein)); 470d0321e0SJeremy L Thompson 480d0321e0SJeremy L Thompson // Diag data 490d0321e0SJeremy L Thompson if (impl->diag) { 500d0321e0SJeremy L Thompson Ceed ceed; 51*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 52*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->diag->module)); 53*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->diag->h_emodein)); 54*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->diag->h_emodeout)); 55*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_emodein)); 56*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_emodeout)); 57*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_identity)); 58*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_interpin)); 59*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_interpout)); 60*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_gradin)); 61*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->diag->d_gradout)); 62*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionDestroy(&impl->diag->pbdiagrstr)); 63*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->diag->elemdiag)); 64*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->diag->pbelemdiag)); 650d0321e0SJeremy L Thompson } 66*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->diag)); 670d0321e0SJeremy L Thompson 68a835093fSnbeams if (impl->asmb) { 69a835093fSnbeams Ceed ceed; 70*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 71*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->asmb->module)); 72*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->asmb->d_B_in)); 73*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->asmb->d_B_out)); 74a835093fSnbeams } 75*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->asmb)); 76a835093fSnbeams 77*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 780d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 790d0321e0SJeremy L Thompson } 800d0321e0SJeremy L Thompson 810d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 820d0321e0SJeremy L Thompson // Setup infields or outfields 830d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 84*2b730f8bSJeremy L Thompson static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool isinput, CeedVector *evecs, CeedVector *qvecs, CeedInt starte, 85*2b730f8bSJeremy L Thompson CeedInt numfields, CeedInt Q, CeedInt numelements) { 86*2b730f8bSJeremy L Thompson CeedInt dim, size; 87d2643443SJeremy L Thompson CeedSize q_size; 880d0321e0SJeremy L Thompson Ceed ceed; 89*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 900d0321e0SJeremy L Thompson CeedBasis basis; 910d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 920d0321e0SJeremy L Thompson CeedOperatorField *opfields; 930d0321e0SJeremy L Thompson CeedQFunctionField *qffields; 940d0321e0SJeremy L Thompson CeedVector fieldvec; 950d0321e0SJeremy L Thompson bool strided; 960d0321e0SJeremy L Thompson bool skiprestrict; 970d0321e0SJeremy L Thompson 980d0321e0SJeremy L Thompson if (isinput) { 99*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL)); 100*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL)); 1010d0321e0SJeremy L Thompson } else { 102*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields)); 103*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields)); 1040d0321e0SJeremy L Thompson } 1050d0321e0SJeremy L Thompson 1060d0321e0SJeremy L Thompson // Loop over fields 1070d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numfields; i++) { 1080d0321e0SJeremy L Thompson CeedEvalMode emode; 109*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qffields[i], &emode)); 1100d0321e0SJeremy L Thompson 1110d0321e0SJeremy L Thompson strided = false; 1120d0321e0SJeremy L Thompson skiprestrict = false; 1130d0321e0SJeremy L Thompson if (emode != CEED_EVAL_WEIGHT) { 114*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict)); 1150d0321e0SJeremy L Thompson 1160d0321e0SJeremy L Thompson // Check whether this field can skip the element restriction: 1170d0321e0SJeremy L Thompson // must be passive input, with emode NONE, and have a strided restriction with 1180d0321e0SJeremy L Thompson // CEED_STRIDES_BACKEND. 1190d0321e0SJeremy L Thompson 1200d0321e0SJeremy L Thompson // First, check whether the field is input or output: 1210d0321e0SJeremy L Thompson if (isinput) { 1220d0321e0SJeremy L Thompson // Check for passive input: 123*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opfields[i], &fieldvec)); 1240d0321e0SJeremy L Thompson if (fieldvec != CEED_VECTOR_ACTIVE) { 1250d0321e0SJeremy L Thompson // Check emode 1260d0321e0SJeremy L Thompson if (emode == CEED_EVAL_NONE) { 1270d0321e0SJeremy L Thompson // Check for strided restriction 128*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsStrided(Erestrict, &strided)); 1290d0321e0SJeremy L Thompson if (strided) { 1300d0321e0SJeremy L Thompson // Check if vector is already in preferred backend ordering 131*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(Erestrict, &skiprestrict)); 1320d0321e0SJeremy L Thompson } 1330d0321e0SJeremy L Thompson } 1340d0321e0SJeremy L Thompson } 1350d0321e0SJeremy L Thompson } 1360d0321e0SJeremy L Thompson if (skiprestrict) { 1370d0321e0SJeremy L Thompson // We do not need an E-Vector, but will use the input field vector's data 1380d0321e0SJeremy L Thompson // directly in the operator application. 1390d0321e0SJeremy L Thompson evecs[i + starte] = NULL; 1400d0321e0SJeremy L Thompson } else { 141*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreateVector(Erestrict, NULL, &evecs[i + starte])); 1420d0321e0SJeremy L Thompson } 1430d0321e0SJeremy L Thompson } 1440d0321e0SJeremy L Thompson 1450d0321e0SJeremy L Thompson switch (emode) { 1460d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 147*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qffields[i], &size)); 148d2643443SJeremy L Thompson q_size = (CeedSize)numelements * Q * size; 149*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &qvecs[i])); 1500d0321e0SJeremy L Thompson break; 1510d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 152*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qffields[i], &size)); 153d2643443SJeremy L Thompson q_size = (CeedSize)numelements * Q * size; 154*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &qvecs[i])); 1550d0321e0SJeremy L Thompson break; 1560d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 157*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opfields[i], &basis)); 158*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qffields[i], &size)); 159*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 160d2643443SJeremy L Thompson q_size = (CeedSize)numelements * Q * size; 161*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &qvecs[i])); 1620d0321e0SJeremy L Thompson break; 1630d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: // Only on input fields 164*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opfields[i], &basis)); 165d2643443SJeremy L Thompson q_size = (CeedSize)numelements * Q; 166*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &qvecs[i])); 167*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, numelements, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, NULL, qvecs[i])); 1680d0321e0SJeremy L Thompson break; 1690d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 1700d0321e0SJeremy L Thompson break; // TODO: Not implemented 1710d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 1720d0321e0SJeremy L Thompson break; // TODO: Not implemented 1730d0321e0SJeremy L Thompson } 1740d0321e0SJeremy L Thompson } 1750d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1760d0321e0SJeremy L Thompson } 1770d0321e0SJeremy L Thompson 1780d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1790d0321e0SJeremy L Thompson // CeedOperator needs to connect all the named fields (be they active or passive) 1800d0321e0SJeremy L Thompson // to the named inputs and outputs of its CeedQFunction. 1810d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1820d0321e0SJeremy L Thompson static int CeedOperatorSetup_Hip(CeedOperator op) { 1830d0321e0SJeremy L Thompson bool setupdone; 184*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorIsSetupDone(op, &setupdone)); 185*2b730f8bSJeremy L Thompson if (setupdone) return CEED_ERROR_SUCCESS; 1860d0321e0SJeremy L Thompson Ceed ceed; 187*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 1880d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 189*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 1900d0321e0SJeremy L Thompson CeedQFunction qf; 191*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 1920d0321e0SJeremy L Thompson CeedInt Q, numelements, numinputfields, numoutputfields; 193*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 194*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumElements(op, &numelements)); 1950d0321e0SJeremy L Thompson CeedOperatorField *opinputfields, *opoutputfields; 196*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &numinputfields, &opinputfields, &numoutputfields, &opoutputfields)); 1970d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, *qfoutputfields; 198*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields)); 1990d0321e0SJeremy L Thompson 2000d0321e0SJeremy L Thompson // Allocate 201*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(numinputfields + numoutputfields, &impl->evecs)); 2020d0321e0SJeremy L Thompson 203*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->qvecsin)); 204*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->qvecsout)); 2050d0321e0SJeremy L Thompson 206*2b730f8bSJeremy L Thompson impl->numein = numinputfields; 207*2b730f8bSJeremy L Thompson impl->numeout = numoutputfields; 2080d0321e0SJeremy L Thompson 2090d0321e0SJeremy L Thompson // Set up infield and outfield evecs and qvecs 2100d0321e0SJeremy L Thompson // Infields 211*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, true, impl->evecs, impl->qvecsin, 0, numinputfields, Q, numelements)); 2120d0321e0SJeremy L Thompson 2130d0321e0SJeremy L Thompson // Outfields 214*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, false, impl->evecs, impl->qvecsout, numinputfields, numoutputfields, Q, numelements)); 2150d0321e0SJeremy L Thompson 216*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetSetupDone(op)); 2170d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2180d0321e0SJeremy L Thompson } 2190d0321e0SJeremy L Thompson 2200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2210d0321e0SJeremy L Thompson // Setup Operator Inputs 2220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 223*2b730f8bSJeremy L Thompson static inline int CeedOperatorSetupInputs_Hip(CeedInt numinputfields, CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 224*2b730f8bSJeremy L Thompson CeedVector invec, const bool skipactive, CeedScalar *edata[2 * CEED_FIELD_MAX], CeedOperator_Hip *impl, 225*2b730f8bSJeremy L Thompson CeedRequest *request) { 2260d0321e0SJeremy L Thompson CeedEvalMode emode; 2270d0321e0SJeremy L Thompson CeedVector vec; 2280d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 2290d0321e0SJeremy L Thompson 2300d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 2310d0321e0SJeremy L Thompson // Get input vector 232*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opinputfields[i], &vec)); 2330d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 234*2b730f8bSJeremy L Thompson if (skipactive) continue; 235*2b730f8bSJeremy L Thompson else vec = invec; 2360d0321e0SJeremy L Thompson } 2370d0321e0SJeremy L Thompson 238*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode)); 2390d0321e0SJeremy L Thompson if (emode == CEED_EVAL_WEIGHT) { // Skip 2400d0321e0SJeremy L Thompson } else { 2410d0321e0SJeremy L Thompson // Get input vector 242*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opinputfields[i], &vec)); 2430d0321e0SJeremy L Thompson // Get input element restriction 244*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict)); 245*2b730f8bSJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) vec = invec; 2460d0321e0SJeremy L Thompson // Restrict, if necessary 2470d0321e0SJeremy L Thompson if (!impl->evecs[i]) { 2480d0321e0SJeremy L Thompson // No restriction for this field; read data directly from vec. 249*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, (const CeedScalar **)&edata[i])); 2500d0321e0SJeremy L Thompson } else { 251*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE, vec, impl->evecs[i], request)); 2520d0321e0SJeremy L Thompson // Get evec 253*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_DEVICE, (const CeedScalar **)&edata[i])); 2540d0321e0SJeremy L Thompson } 2550d0321e0SJeremy L Thompson } 2560d0321e0SJeremy L Thompson } 2570d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2580d0321e0SJeremy L Thompson } 2590d0321e0SJeremy L Thompson 2600d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2610d0321e0SJeremy L Thompson // Input Basis Action 2620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 263*2b730f8bSJeremy L Thompson static inline int CeedOperatorInputBasis_Hip(CeedInt numelements, CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 264*2b730f8bSJeremy L Thompson CeedInt numinputfields, const bool skipactive, CeedScalar *edata[2 * CEED_FIELD_MAX], 265*2b730f8bSJeremy L Thompson CeedOperator_Hip *impl) { 2660d0321e0SJeremy L Thompson CeedInt elemsize, size; 2670d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 2680d0321e0SJeremy L Thompson CeedEvalMode emode; 2690d0321e0SJeremy L Thompson CeedBasis basis; 2700d0321e0SJeremy L Thompson 2710d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 2720d0321e0SJeremy L Thompson // Skip active input 2730d0321e0SJeremy L Thompson if (skipactive) { 2740d0321e0SJeremy L Thompson CeedVector vec; 275*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opinputfields[i], &vec)); 276*2b730f8bSJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) continue; 2770d0321e0SJeremy L Thompson } 2780d0321e0SJeremy L Thompson // Get elemsize, emode, size 279*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict)); 280*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(Erestrict, &elemsize)); 281*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode)); 282*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qfinputfields[i], &size)); 2830d0321e0SJeremy L Thompson // Basis action 2840d0321e0SJeremy L Thompson switch (emode) { 2850d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 286*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_DEVICE, CEED_USE_POINTER, edata[i])); 2870d0321e0SJeremy L Thompson break; 2880d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 289*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opinputfields[i], &basis)); 290*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, numelements, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, impl->evecs[i], impl->qvecsin[i])); 2910d0321e0SJeremy L Thompson break; 2920d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 293*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opinputfields[i], &basis)); 294*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, numelements, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, impl->evecs[i], impl->qvecsin[i])); 2950d0321e0SJeremy L Thompson break; 2960d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 2970d0321e0SJeremy L Thompson break; // No action 2980d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 2990d0321e0SJeremy L Thompson break; // TODO: Not implemented 3000d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 3010d0321e0SJeremy L Thompson break; // TODO: Not implemented 3020d0321e0SJeremy L Thompson } 3030d0321e0SJeremy L Thompson } 3040d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3050d0321e0SJeremy L Thompson } 3060d0321e0SJeremy L Thompson 3070d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3080d0321e0SJeremy L Thompson // Restore Input Vectors 3090d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 310*2b730f8bSJeremy L Thompson static inline int CeedOperatorRestoreInputs_Hip(CeedInt numinputfields, CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 311*2b730f8bSJeremy L Thompson const bool skipactive, CeedScalar *edata[2 * CEED_FIELD_MAX], CeedOperator_Hip *impl) { 3120d0321e0SJeremy L Thompson CeedEvalMode emode; 3130d0321e0SJeremy L Thompson CeedVector vec; 3140d0321e0SJeremy L Thompson 3150d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 3160d0321e0SJeremy L Thompson // Skip active input 3170d0321e0SJeremy L Thompson if (skipactive) { 318*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opinputfields[i], &vec)); 319*2b730f8bSJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) continue; 3200d0321e0SJeremy L Thompson } 321*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode)); 3220d0321e0SJeremy L Thompson if (emode == CEED_EVAL_WEIGHT) { // Skip 3230d0321e0SJeremy L Thompson } else { 3240d0321e0SJeremy L Thompson if (!impl->evecs[i]) { // This was a skiprestrict case 325*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opinputfields[i], &vec)); 326*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(vec, (const CeedScalar **)&edata[i])); 3270d0321e0SJeremy L Thompson } else { 328*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(impl->evecs[i], (const CeedScalar **)&edata[i])); 3290d0321e0SJeremy L Thompson } 3300d0321e0SJeremy L Thompson } 3310d0321e0SJeremy L Thompson } 3320d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3330d0321e0SJeremy L Thompson } 3340d0321e0SJeremy L Thompson 3350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3360d0321e0SJeremy L Thompson // Apply and add to output 3370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 338*2b730f8bSJeremy L Thompson static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector invec, CeedVector outvec, CeedRequest *request) { 3390d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 340*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 3410d0321e0SJeremy L Thompson CeedQFunction qf; 342*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 3430d0321e0SJeremy L Thompson CeedInt Q, numelements, elemsize, numinputfields, numoutputfields, size; 344*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 345*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumElements(op, &numelements)); 3460d0321e0SJeremy L Thompson CeedOperatorField *opinputfields, *opoutputfields; 347*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &numinputfields, &opinputfields, &numoutputfields, &opoutputfields)); 3480d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, *qfoutputfields; 349*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields)); 3500d0321e0SJeremy L Thompson CeedEvalMode emode; 3510d0321e0SJeremy L Thompson CeedVector vec; 3520d0321e0SJeremy L Thompson CeedBasis basis; 3530d0321e0SJeremy L Thompson CeedElemRestriction Erestrict; 3540d0321e0SJeremy L Thompson CeedScalar *edata[2 * CEED_FIELD_MAX]; 3550d0321e0SJeremy L Thompson 3560d0321e0SJeremy L Thompson // Setup 357*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetup_Hip(op)); 3580d0321e0SJeremy L Thompson 3590d0321e0SJeremy L Thompson // Input Evecs and Restriction 360*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetupInputs_Hip(numinputfields, qfinputfields, opinputfields, invec, false, edata, impl, request)); 3610d0321e0SJeremy L Thompson 3620d0321e0SJeremy L Thompson // Input basis apply if needed 363*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorInputBasis_Hip(numelements, qfinputfields, opinputfields, numinputfields, false, edata, impl)); 3640d0321e0SJeremy L Thompson 3650d0321e0SJeremy L Thompson // Output pointers, as necessary 3660d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 367*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode)); 3680d0321e0SJeremy L Thompson if (emode == CEED_EVAL_NONE) { 3690d0321e0SJeremy L Thompson // Set the output Q-Vector to use the E-Vector data directly. 370*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(impl->evecs[i + impl->numein], CEED_MEM_DEVICE, &edata[i + numinputfields])); 371*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_DEVICE, CEED_USE_POINTER, edata[i + numinputfields])); 3720d0321e0SJeremy L Thompson } 3730d0321e0SJeremy L Thompson } 3740d0321e0SJeremy L Thompson 3750d0321e0SJeremy L Thompson // Q function 376*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionApply(qf, numelements * Q, impl->qvecsin, impl->qvecsout)); 3770d0321e0SJeremy L Thompson 3780d0321e0SJeremy L Thompson // Output basis apply if needed 3790d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 3800d0321e0SJeremy L Thompson // Get elemsize, emode, size 381*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict)); 382*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(Erestrict, &elemsize)); 383*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode)); 384*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qfoutputfields[i], &size)); 3850d0321e0SJeremy L Thompson // Basis action 3860d0321e0SJeremy L Thompson switch (emode) { 3870d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 3880d0321e0SJeremy L Thompson break; 3890d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 390*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opoutputfields[i], &basis)); 391*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, numelements, CEED_TRANSPOSE, CEED_EVAL_INTERP, impl->qvecsout[i], impl->evecs[i + impl->numein])); 3920d0321e0SJeremy L Thompson break; 3930d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 394*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opoutputfields[i], &basis)); 395*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, numelements, CEED_TRANSPOSE, CEED_EVAL_GRAD, impl->qvecsout[i], impl->evecs[i + impl->numein])); 3960d0321e0SJeremy L Thompson break; 3970d0321e0SJeremy L Thompson // LCOV_EXCL_START 3980d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 3990d0321e0SJeremy L Thompson Ceed ceed; 400*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 401*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 4020d0321e0SJeremy L Thompson break; // Should not occur 4030d0321e0SJeremy L Thompson } 4040d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 4050d0321e0SJeremy L Thompson break; // TODO: Not implemented 4060d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 4070d0321e0SJeremy L Thompson break; // TODO: Not implemented 4080d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 4090d0321e0SJeremy L Thompson } 4100d0321e0SJeremy L Thompson } 4110d0321e0SJeremy L Thompson 4120d0321e0SJeremy L Thompson // Output restriction 4130d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 4140d0321e0SJeremy L Thompson // Restore evec 415*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode)); 4160d0321e0SJeremy L Thompson if (emode == CEED_EVAL_NONE) { 417*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(impl->evecs[i + impl->numein], &edata[i + numinputfields])); 4180d0321e0SJeremy L Thompson } 4190d0321e0SJeremy L Thompson // Get output vector 420*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opoutputfields[i], &vec)); 4210d0321e0SJeremy L Thompson // Restrict 422*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict)); 4230d0321e0SJeremy L Thompson // Active 424*2b730f8bSJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 4250d0321e0SJeremy L Thompson 426*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE, impl->evecs[i + impl->numein], vec, request)); 4270d0321e0SJeremy L Thompson } 4280d0321e0SJeremy L Thompson 4290d0321e0SJeremy L Thompson // Restore input arrays 430*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorRestoreInputs_Hip(numinputfields, qfinputfields, opinputfields, false, edata, impl)); 4310d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4320d0321e0SJeremy L Thompson } 4330d0321e0SJeremy L Thompson 4340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4350d0321e0SJeremy L Thompson // Core code for assembling linear QFunction 4360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 437*2b730f8bSJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Hip(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 4380d0321e0SJeremy L Thompson CeedRequest *request) { 4390d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 440*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 4410d0321e0SJeremy L Thompson CeedQFunction qf; 442*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 4430d0321e0SJeremy L Thompson CeedInt Q, numelements, numinputfields, numoutputfields, size; 444d2643443SJeremy L Thompson CeedSize q_size; 445*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 446*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumElements(op, &numelements)); 4470d0321e0SJeremy L Thompson CeedOperatorField *opinputfields, *opoutputfields; 448*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &numinputfields, &opinputfields, &numoutputfields, &opoutputfields)); 4490d0321e0SJeremy L Thompson CeedQFunctionField *qfinputfields, *qfoutputfields; 450*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields)); 4510d0321e0SJeremy L Thompson CeedVector vec; 4520d0321e0SJeremy L Thompson CeedInt numactivein = impl->qfnumactivein, numactiveout = impl->qfnumactiveout; 4530d0321e0SJeremy L Thompson CeedVector *activein = impl->qfactivein; 4540d0321e0SJeremy L Thompson CeedScalar *a, *tmp; 4550d0321e0SJeremy L Thompson Ceed ceed, ceedparent; 456*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 457*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetOperatorFallbackParentCeed(ceed, &ceedparent)); 4580d0321e0SJeremy L Thompson ceedparent = ceedparent ? ceedparent : ceed; 4590d0321e0SJeremy L Thompson CeedScalar *edata[2 * CEED_FIELD_MAX]; 4600d0321e0SJeremy L Thompson 4610d0321e0SJeremy L Thompson // Setup 462*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetup_Hip(op)); 4630d0321e0SJeremy L Thompson 4640d0321e0SJeremy L Thompson // Check for identity 4650d0321e0SJeremy L Thompson bool identityqf; 466*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionIsIdentity(qf, &identityqf)); 467*2b730f8bSJeremy L Thompson if (identityqf) { 4680d0321e0SJeremy L Thompson // LCOV_EXCL_START 469*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Assembling identity QFunctions not supported"); 4700d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 471*2b730f8bSJeremy L Thompson } 4720d0321e0SJeremy L Thompson 4730d0321e0SJeremy L Thompson // Input Evecs and Restriction 474*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetupInputs_Hip(numinputfields, qfinputfields, opinputfields, NULL, true, edata, impl, request)); 4750d0321e0SJeremy L Thompson 4760d0321e0SJeremy L Thompson // Count number of active input fields 4770d0321e0SJeremy L Thompson if (!numactivein) { 4780d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 4790d0321e0SJeremy L Thompson // Get input vector 480*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opinputfields[i], &vec)); 4810d0321e0SJeremy L Thompson // Check if active input 4820d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 483*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qfinputfields[i], &size)); 484*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(impl->qvecsin[i], 0.0)); 485*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_DEVICE, &tmp)); 486*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(numactivein + size, &activein)); 4870d0321e0SJeremy L Thompson for (CeedInt field = 0; field < size; field++) { 488d2643443SJeremy L Thompson q_size = (CeedSize)Q * numelements; 489*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &activein[numactivein + field])); 490*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetArray(activein[numactivein + field], CEED_MEM_DEVICE, CEED_USE_POINTER, &tmp[field * Q * numelements])); 4910d0321e0SJeremy L Thompson } 4920d0321e0SJeremy L Thompson numactivein += size; 493*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(impl->qvecsin[i], &tmp)); 4940d0321e0SJeremy L Thompson } 4950d0321e0SJeremy L Thompson } 4960d0321e0SJeremy L Thompson impl->qfnumactivein = numactivein; 4970d0321e0SJeremy L Thompson impl->qfactivein = activein; 4980d0321e0SJeremy L Thompson } 4990d0321e0SJeremy L Thompson 5000d0321e0SJeremy L Thompson // Count number of active output fields 5010d0321e0SJeremy L Thompson if (!numactiveout) { 5020d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 5030d0321e0SJeremy L Thompson // Get output vector 504*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opoutputfields[i], &vec)); 5050d0321e0SJeremy L Thompson // Check if active output 5060d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 507*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qfoutputfields[i], &size)); 5080d0321e0SJeremy L Thompson numactiveout += size; 5090d0321e0SJeremy L Thompson } 5100d0321e0SJeremy L Thompson } 5110d0321e0SJeremy L Thompson impl->qfnumactiveout = numactiveout; 5120d0321e0SJeremy L Thompson } 5130d0321e0SJeremy L Thompson 5140d0321e0SJeremy L Thompson // Check sizes 515*2b730f8bSJeremy L Thompson if (!numactivein || !numactiveout) { 5160d0321e0SJeremy L Thompson // LCOV_EXCL_START 517*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); 5180d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 519*2b730f8bSJeremy L Thompson } 5200d0321e0SJeremy L Thompson 5210d0321e0SJeremy L Thompson // Build objects if needed 5220d0321e0SJeremy L Thompson if (build_objects) { 5230d0321e0SJeremy L Thompson // Create output restriction 5240d0321e0SJeremy L Thompson CeedInt strides[3] = {1, numelements * Q, Q}; /* *NOPAD* */ 525*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreateStrided(ceedparent, numelements, Q, numactivein * numactiveout, 526*2b730f8bSJeremy L Thompson numactivein * numactiveout * numelements * Q, strides, rstr)); 5270d0321e0SJeremy L Thompson // Create assembled vector 528d2643443SJeremy L Thompson CeedSize l_size = (CeedSize)numelements * Q * numactivein * numactiveout; 529*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceedparent, l_size, assembled)); 5300d0321e0SJeremy L Thompson } 531*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(*assembled, 0.0)); 532*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(*assembled, CEED_MEM_DEVICE, &a)); 5330d0321e0SJeremy L Thompson 5340d0321e0SJeremy L Thompson // Input basis apply 535*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorInputBasis_Hip(numelements, qfinputfields, opinputfields, numinputfields, true, edata, impl)); 5360d0321e0SJeremy L Thompson 5370d0321e0SJeremy L Thompson // Assemble QFunction 5380d0321e0SJeremy L Thompson for (CeedInt in = 0; in < numactivein; in++) { 5390d0321e0SJeremy L Thompson // Set Inputs 540*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(activein[in], 1.0)); 5410d0321e0SJeremy L Thompson if (numactivein > 1) { 542*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(activein[(in + numactivein - 1) % numactivein], 0.0)); 5430d0321e0SJeremy L Thompson } 5440d0321e0SJeremy L Thompson // Set Outputs 5450d0321e0SJeremy L Thompson for (CeedInt out = 0; out < numoutputfields; out++) { 5460d0321e0SJeremy L Thompson // Get output vector 547*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opoutputfields[out], &vec)); 5480d0321e0SJeremy L Thompson // Check if active output 5490d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 550*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_DEVICE, CEED_USE_POINTER, a)); 551*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qfoutputfields[out], &size)); 5520d0321e0SJeremy L Thompson a += size * Q * numelements; // Advance the pointer by the size of the output 5530d0321e0SJeremy L Thompson } 5540d0321e0SJeremy L Thompson } 5550d0321e0SJeremy L Thompson // Apply QFunction 556*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionApply(qf, Q * numelements, impl->qvecsin, impl->qvecsout)); 5570d0321e0SJeremy L Thompson } 5580d0321e0SJeremy L Thompson 5590d0321e0SJeremy L Thompson // Un-set output Qvecs to prevent accidental overwrite of Assembled 5600d0321e0SJeremy L Thompson for (CeedInt out = 0; out < numoutputfields; out++) { 5610d0321e0SJeremy L Thompson // Get output vector 562*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opoutputfields[out], &vec)); 5630d0321e0SJeremy L Thompson // Check if active output 5640d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 565*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorTakeArray(impl->qvecsout[out], CEED_MEM_DEVICE, NULL)); 5660d0321e0SJeremy L Thompson } 5670d0321e0SJeremy L Thompson } 5680d0321e0SJeremy L Thompson 5690d0321e0SJeremy L Thompson // Restore input arrays 570*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorRestoreInputs_Hip(numinputfields, qfinputfields, opinputfields, true, edata, impl)); 5710d0321e0SJeremy L Thompson 5720d0321e0SJeremy L Thompson // Restore output 573*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(*assembled, &a)); 5740d0321e0SJeremy L Thompson 5750d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5760d0321e0SJeremy L Thompson } 5770d0321e0SJeremy L Thompson 5780d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5790d0321e0SJeremy L Thompson // Assemble Linear QFunction 5800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 581*2b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Hip(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 582*2b730f8bSJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Hip(op, true, assembled, rstr, request); 5830d0321e0SJeremy L Thompson } 5840d0321e0SJeremy L Thompson 5850d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5860d0321e0SJeremy L Thompson // Assemble Linear QFunction 5870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 588*2b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Hip(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 589*2b730f8bSJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Hip(op, false, &assembled, &rstr, request); 5900d0321e0SJeremy L Thompson } 5910d0321e0SJeremy L Thompson 5920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5930d0321e0SJeremy L Thompson // Create point block restriction 5940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 595*2b730f8bSJeremy L Thompson static int CreatePBRestriction(CeedElemRestriction rstr, CeedElemRestriction *pbRstr) { 5960d0321e0SJeremy L Thompson Ceed ceed; 597*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 5980d0321e0SJeremy L Thompson const CeedInt *offsets; 599*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 6000d0321e0SJeremy L Thompson 6010d0321e0SJeremy L Thompson // Expand offsets 6027b63f5c6SJed Brown CeedInt nelem, ncomp, elemsize, compstride, *pbOffsets; 6037b63f5c6SJed Brown CeedSize l_size; 604*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &nelem)); 605*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &ncomp)); 606*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elemsize)); 607*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &compstride)); 608*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 6090d0321e0SJeremy L Thompson CeedInt shift = ncomp; 610*2b730f8bSJeremy L Thompson if (compstride != 1) shift *= ncomp; 611*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(nelem * elemsize, &pbOffsets)); 6120d0321e0SJeremy L Thompson for (CeedInt i = 0; i < nelem * elemsize; i++) { 6130d0321e0SJeremy L Thompson pbOffsets[i] = offsets[i] * shift; 6140d0321e0SJeremy L Thompson } 6150d0321e0SJeremy L Thompson 6160d0321e0SJeremy L Thompson // Create new restriction 617*2b730f8bSJeremy L Thompson CeedCallBackend( 618*2b730f8bSJeremy L Thompson CeedElemRestrictionCreate(ceed, nelem, elemsize, ncomp * ncomp, 1, l_size * ncomp, CEED_MEM_HOST, CEED_OWN_POINTER, pbOffsets, pbRstr)); 6190d0321e0SJeremy L Thompson 6200d0321e0SJeremy L Thompson // Cleanup 621*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 6220d0321e0SJeremy L Thompson 6230d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6240d0321e0SJeremy L Thompson } 6250d0321e0SJeremy L Thompson 6260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 6270d0321e0SJeremy L Thompson // Assemble diagonal setup 6280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 629*2b730f8bSJeremy L Thompson static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op, const bool pointBlock) { 6300d0321e0SJeremy L Thompson Ceed ceed; 631*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 6320d0321e0SJeremy L Thompson CeedQFunction qf; 633*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 6340d0321e0SJeremy L Thompson CeedInt numinputfields, numoutputfields; 635*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields)); 6360d0321e0SJeremy L Thompson 6370d0321e0SJeremy L Thompson // Determine active input basis 6380d0321e0SJeremy L Thompson CeedOperatorField *opfields; 6390d0321e0SJeremy L Thompson CeedQFunctionField *qffields; 640*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL)); 641*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL)); 6420d0321e0SJeremy L Thompson CeedInt numemodein = 0, ncomp = 0, dim = 1; 6430d0321e0SJeremy L Thompson CeedEvalMode *emodein = NULL; 6440d0321e0SJeremy L Thompson CeedBasis basisin = NULL; 6450d0321e0SJeremy L Thompson CeedElemRestriction rstrin = NULL; 6460d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 6470d0321e0SJeremy L Thompson CeedVector vec; 648*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opfields[i], &vec)); 6490d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 6500d0321e0SJeremy L Thompson CeedElemRestriction rstr; 651*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opfields[i], &basisin)); 652*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basisin, &ncomp)); 653*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basisin, &dim)); 654*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opfields[i], &rstr)); 655*2b730f8bSJeremy L Thompson if (rstrin && rstrin != rstr) { 6560d0321e0SJeremy L Thompson // LCOV_EXCL_START 657*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator diagonal assembly"); 6580d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 659*2b730f8bSJeremy L Thompson } 6600d0321e0SJeremy L Thompson rstrin = rstr; 6610d0321e0SJeremy L Thompson CeedEvalMode emode; 662*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qffields[i], &emode)); 6630d0321e0SJeremy L Thompson switch (emode) { 6640d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 6650d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 666*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(numemodein + 1, &emodein)); 6670d0321e0SJeremy L Thompson emodein[numemodein] = emode; 6680d0321e0SJeremy L Thompson numemodein += 1; 6690d0321e0SJeremy L Thompson break; 6700d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 671*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(numemodein + dim, &emodein)); 672*2b730f8bSJeremy L Thompson for (CeedInt d = 0; d < dim; d++) emodein[numemodein + d] = emode; 6730d0321e0SJeremy L Thompson numemodein += dim; 6740d0321e0SJeremy L Thompson break; 6750d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 6760d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 6770d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 6780d0321e0SJeremy L Thompson break; // Caught by QF Assembly 6790d0321e0SJeremy L Thompson } 6800d0321e0SJeremy L Thompson } 6810d0321e0SJeremy L Thompson } 6820d0321e0SJeremy L Thompson 6830d0321e0SJeremy L Thompson // Determine active output basis 684*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields)); 685*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields)); 6860d0321e0SJeremy L Thompson CeedInt numemodeout = 0; 6870d0321e0SJeremy L Thompson CeedEvalMode *emodeout = NULL; 6880d0321e0SJeremy L Thompson CeedBasis basisout = NULL; 6890d0321e0SJeremy L Thompson CeedElemRestriction rstrout = NULL; 6900d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 6910d0321e0SJeremy L Thompson CeedVector vec; 692*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(opfields[i], &vec)); 6930d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 6940d0321e0SJeremy L Thompson CeedElemRestriction rstr; 695*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(opfields[i], &basisout)); 696*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(opfields[i], &rstr)); 697*2b730f8bSJeremy L Thompson if (rstrout && rstrout != rstr) { 6980d0321e0SJeremy L Thompson // LCOV_EXCL_START 699*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator diagonal assembly"); 7000d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 701*2b730f8bSJeremy L Thompson } 7020d0321e0SJeremy L Thompson rstrout = rstr; 7030d0321e0SJeremy L Thompson CeedEvalMode emode; 704*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qffields[i], &emode)); 7050d0321e0SJeremy L Thompson switch (emode) { 7060d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 7070d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 708*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(numemodeout + 1, &emodeout)); 7090d0321e0SJeremy L Thompson emodeout[numemodeout] = emode; 7100d0321e0SJeremy L Thompson numemodeout += 1; 7110d0321e0SJeremy L Thompson break; 7120d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 713*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(numemodeout + dim, &emodeout)); 714*2b730f8bSJeremy L Thompson for (CeedInt d = 0; d < dim; d++) emodeout[numemodeout + d] = emode; 7150d0321e0SJeremy L Thompson numemodeout += dim; 7160d0321e0SJeremy L Thompson break; 7170d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 7180d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 7190d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 7200d0321e0SJeremy L Thompson break; // Caught by QF Assembly 7210d0321e0SJeremy L Thompson } 7220d0321e0SJeremy L Thompson } 7230d0321e0SJeremy L Thompson } 7240d0321e0SJeremy L Thompson 7250d0321e0SJeremy L Thompson // Operator data struct 7260d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 727*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 728*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl->diag)); 7290d0321e0SJeremy L Thompson CeedOperatorDiag_Hip *diag = impl->diag; 7300d0321e0SJeremy L Thompson diag->basisin = basisin; 7310d0321e0SJeremy L Thompson diag->basisout = basisout; 7320d0321e0SJeremy L Thompson diag->h_emodein = emodein; 7330d0321e0SJeremy L Thompson diag->h_emodeout = emodeout; 7340d0321e0SJeremy L Thompson diag->numemodein = numemodein; 7350d0321e0SJeremy L Thompson diag->numemodeout = numemodeout; 7360d0321e0SJeremy L Thompson 7370d0321e0SJeremy L Thompson // Assemble kernel 73807b31e0eSJeremy L Thompson 73907b31e0eSJeremy L Thompson char *diagonal_kernel_path, *diagonal_kernel_source; 740*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h", &diagonal_kernel_path)); 74107b31e0eSJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Diagonal Assembly Kernel Source -----\n"); 742*2b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, diagonal_kernel_path, &diagonal_kernel_source)); 743*2b730f8bSJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Diagonal Assembly Source Complete! -----\n"); 7440d0321e0SJeremy L Thompson CeedInt nnodes, nqpts; 745*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basisin, &nnodes)); 746*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basisin, &nqpts)); 7470d0321e0SJeremy L Thompson diag->nnodes = nnodes; 748*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCompileHip(ceed, diagonal_kernel_source, &diag->module, 5, "NUMEMODEIN", numemodein, "NUMEMODEOUT", numemodeout, "NNODES", 749*2b730f8bSJeremy L Thompson nnodes, "NQPTS", nqpts, "NCOMP", ncomp)); 750*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetKernelHip(ceed, diag->module, "linearDiagonal", &diag->linearDiagonal)); 751*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetKernelHip(ceed, diag->module, "linearPointBlockDiagonal", &diag->linearPointBlock)); 752*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&diagonal_kernel_path)); 753*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&diagonal_kernel_source)); 7540d0321e0SJeremy L Thompson 7550d0321e0SJeremy L Thompson // Basis matrices 7560d0321e0SJeremy L Thompson const CeedInt qBytes = nqpts * sizeof(CeedScalar); 7570d0321e0SJeremy L Thompson const CeedInt iBytes = qBytes * nnodes; 7580d0321e0SJeremy L Thompson const CeedInt gBytes = qBytes * nnodes * dim; 7590d0321e0SJeremy L Thompson const CeedInt eBytes = sizeof(CeedEvalMode); 7600d0321e0SJeremy L Thompson const CeedScalar *interpin, *interpout, *gradin, *gradout; 7610d0321e0SJeremy L Thompson 7620d0321e0SJeremy L Thompson // CEED_EVAL_NONE 7630d0321e0SJeremy L Thompson CeedScalar *identity = NULL; 7640d0321e0SJeremy L Thompson bool evalNone = false; 765*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < numemodein; i++) evalNone = evalNone || (emodein[i] == CEED_EVAL_NONE); 766*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < numemodeout; i++) evalNone = evalNone || (emodeout[i] == CEED_EVAL_NONE); 7670d0321e0SJeremy L Thompson if (evalNone) { 768*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(nqpts * nnodes, &identity)); 769*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < (nnodes < nqpts ? nnodes : nqpts); i++) identity[i * nnodes + i] = 1.0; 770*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_identity, iBytes)); 771*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_identity, identity, iBytes, hipMemcpyHostToDevice)); 7720d0321e0SJeremy L Thompson } 7730d0321e0SJeremy L Thompson 7740d0321e0SJeremy L Thompson // CEED_EVAL_INTERP 775*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basisin, &interpin)); 776*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_interpin, iBytes)); 777*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_interpin, interpin, iBytes, hipMemcpyHostToDevice)); 778*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basisout, &interpout)); 779*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_interpout, iBytes)); 780*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_interpout, interpout, iBytes, hipMemcpyHostToDevice)); 7810d0321e0SJeremy L Thompson 7820d0321e0SJeremy L Thompson // CEED_EVAL_GRAD 783*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basisin, &gradin)); 784*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_gradin, gBytes)); 785*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_gradin, gradin, gBytes, hipMemcpyHostToDevice)); 786*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basisout, &gradout)); 787*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_gradout, gBytes)); 788*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_gradout, gradout, gBytes, hipMemcpyHostToDevice)); 7890d0321e0SJeremy L Thompson 7900d0321e0SJeremy L Thompson // Arrays of emodes 791*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_emodein, numemodein * eBytes)); 792*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_emodein, emodein, numemodein * eBytes, hipMemcpyHostToDevice)); 793*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&diag->d_emodeout, numemodeout * eBytes)); 794*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(diag->d_emodeout, emodeout, numemodeout * eBytes, hipMemcpyHostToDevice)); 7950d0321e0SJeremy L Thompson 7960d0321e0SJeremy L Thompson // Restriction 7970d0321e0SJeremy L Thompson diag->diagrstr = rstrout; 7980d0321e0SJeremy L Thompson 7990d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8000d0321e0SJeremy L Thompson } 8010d0321e0SJeremy L Thompson 8020d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8030d0321e0SJeremy L Thompson // Assemble diagonal common code 8040d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 805*2b730f8bSJeremy L Thompson static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, CeedVector assembled, CeedRequest *request, const bool pointBlock) { 8060d0321e0SJeremy L Thompson Ceed ceed; 807*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 8080d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 809*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 8100d0321e0SJeremy L Thompson 8110d0321e0SJeremy L Thompson // Assemble QFunction 8120d0321e0SJeremy L Thompson CeedVector assembledqf; 8130d0321e0SJeremy L Thompson CeedElemRestriction rstr; 814*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembledqf, &rstr, request)); 815*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); 8160d0321e0SJeremy L Thompson 8170d0321e0SJeremy L Thompson // Setup 818*2b730f8bSJeremy L Thompson if (!impl->diag) CeedCallBackend(CeedOperatorAssembleDiagonalSetup_Hip(op, pointBlock)); 8190d0321e0SJeremy L Thompson CeedOperatorDiag_Hip *diag = impl->diag; 8200d0321e0SJeremy L Thompson assert(diag != NULL); 8210d0321e0SJeremy L Thompson 8220d0321e0SJeremy L Thompson // Restriction 8230d0321e0SJeremy L Thompson if (pointBlock && !diag->pbdiagrstr) { 8240d0321e0SJeremy L Thompson CeedElemRestriction pbdiagrstr; 825*2b730f8bSJeremy L Thompson CeedCallBackend(CreatePBRestriction(diag->diagrstr, &pbdiagrstr)); 8260d0321e0SJeremy L Thompson diag->pbdiagrstr = pbdiagrstr; 8270d0321e0SJeremy L Thompson } 8280d0321e0SJeremy L Thompson CeedElemRestriction diagrstr = pointBlock ? diag->pbdiagrstr : diag->diagrstr; 8290d0321e0SJeremy L Thompson 8300d0321e0SJeremy L Thompson // Create diagonal vector 8310d0321e0SJeremy L Thompson CeedVector elemdiag = pointBlock ? diag->pbelemdiag : diag->elemdiag; 8320d0321e0SJeremy L Thompson if (!elemdiag) { 8330d0321e0SJeremy L Thompson // Element diagonal vector 834*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreateVector(diagrstr, NULL, &elemdiag)); 835*2b730f8bSJeremy L Thompson if (pointBlock) diag->pbelemdiag = elemdiag; 836*2b730f8bSJeremy L Thompson else diag->elemdiag = elemdiag; 8370d0321e0SJeremy L Thompson } 838*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(elemdiag, 0.0)); 8390d0321e0SJeremy L Thompson 8400d0321e0SJeremy L Thompson // Assemble element operator diagonals 8410d0321e0SJeremy L Thompson CeedScalar *elemdiagarray; 8420d0321e0SJeremy L Thompson const CeedScalar *assembledqfarray; 843*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(elemdiag, CEED_MEM_DEVICE, &elemdiagarray)); 844*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(assembledqf, CEED_MEM_DEVICE, &assembledqfarray)); 8450d0321e0SJeremy L Thompson CeedInt nelem; 846*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(diagrstr, &nelem)); 8470d0321e0SJeremy L Thompson 8480d0321e0SJeremy L Thompson // Compute the diagonal of B^T D B 8490d0321e0SJeremy L Thompson int elemsPerBlock = 1; 8500d0321e0SJeremy L Thompson int grid = nelem / elemsPerBlock + ((nelem / elemsPerBlock * elemsPerBlock < nelem) ? 1 : 0); 851*2b730f8bSJeremy L Thompson void *args[] = {(void *)&nelem, &diag->d_identity, &diag->d_interpin, &diag->d_gradin, &diag->d_interpout, 852*2b730f8bSJeremy L Thompson &diag->d_gradout, &diag->d_emodein, &diag->d_emodeout, &assembledqfarray, &elemdiagarray}; 8530d0321e0SJeremy L Thompson if (pointBlock) { 854*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRunKernelDimHip(ceed, diag->linearPointBlock, grid, diag->nnodes, 1, elemsPerBlock, args)); 8550d0321e0SJeremy L Thompson } else { 856*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRunKernelDimHip(ceed, diag->linearDiagonal, grid, diag->nnodes, 1, elemsPerBlock, args)); 8570d0321e0SJeremy L Thompson } 8580d0321e0SJeremy L Thompson 8590d0321e0SJeremy L Thompson // Restore arrays 860*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(elemdiag, &elemdiagarray)); 861*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(assembledqf, &assembledqfarray)); 8620d0321e0SJeremy L Thompson 8630d0321e0SJeremy L Thompson // Assemble local operator diagonal 864*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApply(diagrstr, CEED_TRANSPOSE, elemdiag, assembled, request)); 8650d0321e0SJeremy L Thompson 8660d0321e0SJeremy L Thompson // Cleanup 867*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&assembledqf)); 8680d0321e0SJeremy L Thompson 8690d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8700d0321e0SJeremy L Thompson } 8710d0321e0SJeremy L Thompson 8720d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8730d0321e0SJeremy L Thompson // Assemble Linear Diagonal 8740d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 875*2b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleAddDiagonal_Hip(CeedOperator op, CeedVector assembled, CeedRequest *request) { 876*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, false)); 8776aa95790SJeremy L Thompson return CEED_ERROR_SUCCESS; 8780d0321e0SJeremy L Thompson } 8790d0321e0SJeremy L Thompson 8800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8810d0321e0SJeremy L Thompson // Assemble Linear Point Block Diagonal 8820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 883*2b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op, CeedVector assembled, CeedRequest *request) { 884*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, true)); 8856aa95790SJeremy L Thompson return CEED_ERROR_SUCCESS; 8860d0321e0SJeremy L Thompson } 8870d0321e0SJeremy L Thompson 888a835093fSnbeams //------------------------------------------------------------------------------ 889a835093fSnbeams // Single operator assembly setup 890a835093fSnbeams //------------------------------------------------------------------------------ 891a835093fSnbeams static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op) { 892a835093fSnbeams Ceed ceed; 893*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 894a835093fSnbeams CeedOperator_Hip *impl; 895*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 896a835093fSnbeams 897a835093fSnbeams // Get intput and output fields 898a835093fSnbeams CeedInt num_input_fields, num_output_fields; 899a835093fSnbeams CeedOperatorField *input_fields; 900a835093fSnbeams CeedOperatorField *output_fields; 901*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 902a835093fSnbeams 903a835093fSnbeams // Determine active input basis eval mode 904a835093fSnbeams CeedQFunction qf; 905*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 906a835093fSnbeams CeedQFunctionField *qf_fields; 907*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 908a835093fSnbeams // Note that the kernel will treat each dimension of a gradient action separately; 909a835093fSnbeams // i.e., when an active input has a CEED_EVAL_GRAD mode, num_emode_in will increment 910a835093fSnbeams // by dim. However, for the purposes of loading the B matrices, it will be treated 911a835093fSnbeams // as one mode, and we will load/copy the entire gradient matrix at once, so 912a835093fSnbeams // num_B_in_mats_to_load will be incremented by 1. 913a835093fSnbeams CeedInt num_emode_in = 0, dim = 1, num_B_in_mats_to_load = 0, size_B_in = 0; 914a835093fSnbeams CeedEvalMode *eval_mode_in = NULL; // will be of size num_B_in_mats_load 915a835093fSnbeams CeedBasis basis_in = NULL; 916b216396cSJeremy L Thompson CeedInt nqpts = 0, esize = 0; 917a835093fSnbeams CeedElemRestriction rstr_in = NULL; 918a835093fSnbeams for (CeedInt i = 0; i < num_input_fields; i++) { 919a835093fSnbeams CeedVector vec; 920*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(input_fields[i], &vec)); 921a835093fSnbeams if (vec == CEED_VECTOR_ACTIVE) { 922*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis_in)); 923*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); 924*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &nqpts)); 925*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in)); 926*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_in, &esize)); 927a835093fSnbeams CeedEvalMode eval_mode; 928*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 929a835093fSnbeams if (eval_mode != CEED_EVAL_NONE) { 930*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(num_B_in_mats_to_load + 1, &eval_mode_in)); 931a835093fSnbeams eval_mode_in[num_B_in_mats_to_load] = eval_mode; 932a835093fSnbeams num_B_in_mats_to_load += 1; 933a835093fSnbeams if (eval_mode == CEED_EVAL_GRAD) { 934a835093fSnbeams num_emode_in += dim; 935a835093fSnbeams size_B_in += dim * esize * nqpts; 936a835093fSnbeams } else { 937a835093fSnbeams num_emode_in += 1; 938a835093fSnbeams size_B_in += esize * nqpts; 939a835093fSnbeams } 940a835093fSnbeams } 941a835093fSnbeams } 942a835093fSnbeams } 943a835093fSnbeams 944a835093fSnbeams // Determine active output basis; basis_out and rstr_out only used if same as input, TODO 945*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields)); 946a835093fSnbeams CeedInt num_emode_out = 0, num_B_out_mats_to_load = 0, size_B_out = 0; 947a835093fSnbeams CeedEvalMode *eval_mode_out = NULL; 948a835093fSnbeams CeedBasis basis_out = NULL; 949a835093fSnbeams CeedElemRestriction rstr_out = NULL; 950a835093fSnbeams for (CeedInt i = 0; i < num_output_fields; i++) { 951a835093fSnbeams CeedVector vec; 952*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(output_fields[i], &vec)); 953a835093fSnbeams if (vec == CEED_VECTOR_ACTIVE) { 954*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis_out)); 955*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out)); 956*2b730f8bSJeremy L Thompson if (rstr_out && rstr_out != rstr_in) { 957a835093fSnbeams // LCOV_EXCL_START 958*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator assembly"); 959a835093fSnbeams // LCOV_EXCL_STOP 960*2b730f8bSJeremy L Thompson } 961a835093fSnbeams CeedEvalMode eval_mode; 962*2b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 963a835093fSnbeams if (eval_mode != CEED_EVAL_NONE) { 964*2b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(num_B_out_mats_to_load + 1, &eval_mode_out)); 965a835093fSnbeams eval_mode_out[num_B_out_mats_to_load] = eval_mode; 966a835093fSnbeams num_B_out_mats_to_load += 1; 967a835093fSnbeams if (eval_mode == CEED_EVAL_GRAD) { 968a835093fSnbeams num_emode_out += dim; 969a835093fSnbeams size_B_out += dim * esize * nqpts; 970a835093fSnbeams } else { 971a835093fSnbeams num_emode_out += 1; 972a835093fSnbeams size_B_out += esize * nqpts; 973a835093fSnbeams } 974a835093fSnbeams } 975a835093fSnbeams } 976a835093fSnbeams } 977a835093fSnbeams 978*2b730f8bSJeremy L Thompson if (num_emode_in == 0 || num_emode_out == 0) { 979a835093fSnbeams // LCOV_EXCL_START 980*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 981a835093fSnbeams // LCOV_EXCL_STOP 982*2b730f8bSJeremy L Thompson } 983a835093fSnbeams 984a835093fSnbeams CeedInt nelem, ncomp; 985*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr_in, &nelem)); 986*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &ncomp)); 987a835093fSnbeams 988*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl->asmb)); 989a835093fSnbeams CeedOperatorAssemble_Hip *asmb = impl->asmb; 990a835093fSnbeams asmb->nelem = nelem; 991a835093fSnbeams 992a835093fSnbeams // Compile kernels 993a835093fSnbeams int elemsPerBlock = 1; 994a835093fSnbeams asmb->elemsPerBlock = elemsPerBlock; 99559ad764aSnbeams CeedInt block_size = esize * esize * elemsPerBlock; 99607b31e0eSJeremy L Thompson char *assembly_kernel_path, *assembly_kernel_source; 997*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-operator-assemble.h", &assembly_kernel_path)); 99807b31e0eSJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Assembly Kernel Source -----\n"); 999*2b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, assembly_kernel_path, &assembly_kernel_source)); 100007b31e0eSJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Assembly Source Complete! -----\n"); 100107b31e0eSJeremy L Thompson bool fallback = block_size > 1024; 100207b31e0eSJeremy L Thompson if (fallback) { // Use fallback kernel with 1D threadblock 100359ad764aSnbeams block_size = esize * elemsPerBlock; 100459ad764aSnbeams asmb->block_size_x = esize; 100559ad764aSnbeams asmb->block_size_y = 1; 100659ad764aSnbeams } else { // Use kernel with 2D threadblock 100759ad764aSnbeams asmb->block_size_x = esize; 100859ad764aSnbeams asmb->block_size_y = esize; 100907b31e0eSJeremy L Thompson } 1010*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCompileHip(ceed, assembly_kernel_source, &asmb->module, 7, "NELEM", nelem, "NUMEMODEIN", num_emode_in, "NUMEMODEOUT", 1011*2b730f8bSJeremy L Thompson num_emode_out, "NQPTS", nqpts, "NNODES", esize, "BLOCK_SIZE", block_size, "NCOMP", ncomp)); 1012*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetKernelHip(ceed, asmb->module, fallback ? "linearAssembleFallback" : "linearAssemble", &asmb->linearAssemble)); 1013*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&assembly_kernel_path)); 1014*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&assembly_kernel_source)); 1015a835093fSnbeams 1016a835093fSnbeams // Build 'full' B matrices (not 1D arrays used for tensor-product matrices) 1017a835093fSnbeams const CeedScalar *interp_in, *grad_in; 1018*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basis_in, &interp_in)); 1019*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basis_in, &grad_in)); 1020a835093fSnbeams 1021a835093fSnbeams // Load into B_in, in order that they will be used in eval_mode 1022a835093fSnbeams const CeedInt inBytes = size_B_in * sizeof(CeedScalar); 1023a835093fSnbeams CeedInt mat_start = 0; 1024*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&asmb->d_B_in, inBytes)); 1025a835093fSnbeams for (int i = 0; i < num_B_in_mats_to_load; i++) { 1026a835093fSnbeams CeedEvalMode eval_mode = eval_mode_in[i]; 1027a835093fSnbeams if (eval_mode == CEED_EVAL_INTERP) { 1028*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(&asmb->d_B_in[mat_start], interp_in, esize * nqpts * sizeof(CeedScalar), hipMemcpyHostToDevice)); 1029a835093fSnbeams mat_start += esize * nqpts; 1030a835093fSnbeams } else if (eval_mode == CEED_EVAL_GRAD) { 1031*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(&asmb->d_B_in[mat_start], grad_in, dim * esize * nqpts * sizeof(CeedScalar), hipMemcpyHostToDevice)); 1032a835093fSnbeams mat_start += dim * esize * nqpts; 1033a835093fSnbeams } 1034a835093fSnbeams } 1035a835093fSnbeams 1036a835093fSnbeams const CeedScalar *interp_out, *grad_out; 1037a835093fSnbeams // Note that this function currently assumes 1 basis, so this should always be true 1038a835093fSnbeams // for now 1039a835093fSnbeams if (basis_out == basis_in) { 1040a835093fSnbeams interp_out = interp_in; 1041a835093fSnbeams grad_out = grad_in; 1042a835093fSnbeams } else { 1043*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basis_out, &interp_out)); 1044*2b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basis_out, &grad_out)); 1045a835093fSnbeams } 1046a835093fSnbeams 1047a835093fSnbeams // Load into B_out, in order that they will be used in eval_mode 1048a835093fSnbeams const CeedInt outBytes = size_B_out * sizeof(CeedScalar); 1049a835093fSnbeams mat_start = 0; 1050*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&asmb->d_B_out, outBytes)); 1051a835093fSnbeams for (int i = 0; i < num_B_out_mats_to_load; i++) { 1052a835093fSnbeams CeedEvalMode eval_mode = eval_mode_out[i]; 1053a835093fSnbeams if (eval_mode == CEED_EVAL_INTERP) { 1054*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(&asmb->d_B_out[mat_start], interp_out, esize * nqpts * sizeof(CeedScalar), hipMemcpyHostToDevice)); 1055a835093fSnbeams mat_start += esize * nqpts; 1056a835093fSnbeams } else if (eval_mode == CEED_EVAL_GRAD) { 1057*2b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(&asmb->d_B_out[mat_start], grad_out, dim * esize * nqpts * sizeof(CeedScalar), hipMemcpyHostToDevice)); 1058a835093fSnbeams mat_start += dim * esize * nqpts; 1059a835093fSnbeams } 1060a835093fSnbeams } 1061a835093fSnbeams return CEED_ERROR_SUCCESS; 1062a835093fSnbeams } 1063a835093fSnbeams 1064a835093fSnbeams //------------------------------------------------------------------------------ 1065cefa2673SJeremy L Thompson // Assemble matrix data for COO matrix of assembled operator. 1066cefa2673SJeremy L Thompson // The sparsity pattern is set by CeedOperatorLinearAssembleSymbolic. 1067cefa2673SJeremy L Thompson // 1068cefa2673SJeremy L Thompson // Note that this (and other assembly routines) currently assume only one 1069cefa2673SJeremy L Thompson // active input restriction/basis per operator (could have multiple basis eval 1070cefa2673SJeremy L Thompson // modes). 1071cefa2673SJeremy L Thompson // TODO: allow multiple active input restrictions/basis objects 1072a835093fSnbeams //------------------------------------------------------------------------------ 1073*2b730f8bSJeremy L Thompson static int CeedSingleOperatorAssemble_Hip(CeedOperator op, CeedInt offset, CeedVector values) { 1074a835093fSnbeams Ceed ceed; 1075*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 1076a835093fSnbeams CeedOperator_Hip *impl; 1077*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 1078a835093fSnbeams 1079a835093fSnbeams // Setup 1080a835093fSnbeams if (!impl->asmb) { 1081*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSingleOperatorAssembleSetup_Hip(op)); 1082b216396cSJeremy L Thompson assert(impl->asmb != NULL); 1083a835093fSnbeams } 1084a835093fSnbeams 1085a835093fSnbeams // Assemble QFunction 1086a835093fSnbeams CeedVector assembled_qf; 1087a835093fSnbeams CeedElemRestriction rstr_q; 1088*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 1089*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionDestroy(&rstr_q)); 1090a835093fSnbeams CeedScalar *values_array; 1091*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(values, CEED_MEM_DEVICE, &values_array)); 1092a835093fSnbeams values_array += offset; 1093a835093fSnbeams const CeedScalar *qf_array; 1094*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_DEVICE, &qf_array)); 1095a835093fSnbeams 1096a835093fSnbeams // Compute B^T D B 1097b216396cSJeremy L Thompson const CeedInt nelem = impl->asmb->nelem; // to satisfy clang-tidy 1098a835093fSnbeams const CeedInt elemsPerBlock = impl->asmb->elemsPerBlock; 1099*2b730f8bSJeremy L Thompson const CeedInt grid = nelem / elemsPerBlock + ((nelem / elemsPerBlock * elemsPerBlock < nelem) ? 1 : 0); 1100*2b730f8bSJeremy L Thompson void *args[] = {&impl->asmb->d_B_in, &impl->asmb->d_B_out, &qf_array, &values_array}; 1101*2b730f8bSJeremy L Thompson CeedCallBackend( 1102*2b730f8bSJeremy L Thompson CeedRunKernelDimHip(ceed, impl->asmb->linearAssemble, grid, impl->asmb->block_size_x, impl->asmb->block_size_y, elemsPerBlock, args)); 1103a835093fSnbeams 1104a835093fSnbeams // Restore arrays 1105*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(values, &values_array)); 1106*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(assembled_qf, &qf_array)); 1107a835093fSnbeams 1108a835093fSnbeams // Cleanup 1109*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&assembled_qf)); 1110a835093fSnbeams 1111a835093fSnbeams return CEED_ERROR_SUCCESS; 1112a835093fSnbeams } 1113a835093fSnbeams 1114a835093fSnbeams //------------------------------------------------------------------------------ 11150d0321e0SJeremy L Thompson // Create operator 11160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 11170d0321e0SJeremy L Thompson int CeedOperatorCreate_Hip(CeedOperator op) { 11180d0321e0SJeremy L Thompson Ceed ceed; 1119*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 11200d0321e0SJeremy L Thompson CeedOperator_Hip *impl; 11210d0321e0SJeremy L Thompson 1122*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 1123*2b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetData(op, impl)); 11240d0321e0SJeremy L Thompson 1125*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Hip)); 1126*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Hip)); 1127*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", CeedOperatorLinearAssembleAddDiagonal_Hip)); 1128*2b730f8bSJeremy L Thompson CeedCallBackend( 1129*2b730f8bSJeremy L Thompson CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddPointBlockDiagonal", CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip)); 1130*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleSingle", CeedSingleOperatorAssemble_Hip)); 1131*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Hip)); 1132*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Hip)); 11330d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 11340d0321e0SJeremy L Thompson } 11350d0321e0SJeremy L Thompson 11360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1137