13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 92b730f8bSJeremy L Thompson #include <ceed/backend.h> 102b730f8bSJeremy L Thompson #include <ceed/jit-tools.h> 11c85e8640SSebastian Grimberg #include <assert.h> 120d0321e0SJeremy L Thompson #include <cuda.h> 130d0321e0SJeremy L Thompson #include <cuda_runtime.h> 140d0321e0SJeremy L Thompson #include <stdbool.h> 150d0321e0SJeremy L Thompson #include <string.h> 162b730f8bSJeremy L Thompson 1749aac155SJeremy L Thompson #include "../cuda/ceed-cuda-common.h" 180d0321e0SJeremy L Thompson #include "../cuda/ceed-cuda-compile.h" 192b730f8bSJeremy L Thompson #include "ceed-cuda-ref.h" 200d0321e0SJeremy L Thompson 210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 220d0321e0SJeremy L Thompson // Destroy operator 230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 240d0321e0SJeremy L Thompson static int CeedOperatorDestroy_Cuda(CeedOperator op) { 250d0321e0SJeremy L Thompson CeedOperator_Cuda *impl; 26ca735530SJeremy L Thompson 272b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 280d0321e0SJeremy L Thompson 290d0321e0SJeremy L Thompson // Apply data 30ca735530SJeremy L Thompson for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) { 31ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->e_vecs[i])); 320d0321e0SJeremy L Thompson } 33ca735530SJeremy L Thompson CeedCallBackend(CeedFree(&impl->e_vecs)); 340d0321e0SJeremy L Thompson 35ca735530SJeremy L Thompson for (CeedInt i = 0; i < impl->num_inputs; i++) { 36ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->q_vecs_in[i])); 370d0321e0SJeremy L Thompson } 38ca735530SJeremy L Thompson CeedCallBackend(CeedFree(&impl->q_vecs_in)); 390d0321e0SJeremy L Thompson 40ca735530SJeremy L Thompson for (CeedInt i = 0; i < impl->num_outputs; i++) { 41ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->q_vecs_out[i])); 420d0321e0SJeremy L Thompson } 43ca735530SJeremy L Thompson CeedCallBackend(CeedFree(&impl->q_vecs_out)); 440d0321e0SJeremy L Thompson 450d0321e0SJeremy L Thompson // QFunction assembly data 46ca735530SJeremy L Thompson for (CeedInt i = 0; i < impl->num_active_in; i++) { 47ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->qf_active_in[i])); 480d0321e0SJeremy L Thompson } 49ca735530SJeremy L Thompson CeedCallBackend(CeedFree(&impl->qf_active_in)); 500d0321e0SJeremy L Thompson 510d0321e0SJeremy L Thompson // Diag data 520d0321e0SJeremy L Thompson if (impl->diag) { 530d0321e0SJeremy L Thompson Ceed ceed; 54ca735530SJeremy L Thompson 552b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 562b730f8bSJeremy L Thompson CeedCallCuda(ceed, cuModuleUnload(impl->diag->module)); 57ca735530SJeremy L Thompson CeedCallBackend(CeedFree(&impl->diag->h_e_mode_in)); 58ca735530SJeremy L Thompson CeedCallBackend(CeedFree(&impl->diag->h_e_mode_out)); 59ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_e_mode_in)); 60ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_e_mode_out)); 612b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_identity)); 62ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_interp_in)); 63ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_interp_out)); 64ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_grad_in)); 65ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->diag->d_grad_out)); 66506b1a0cSSebastian Grimberg CeedCallBackend(CeedElemRestrictionDestroy(&impl->diag->point_block_diag_rstr)); 67ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->diag->elem_diag)); 68ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&impl->diag->point_block_elem_diag)); 690d0321e0SJeremy L Thompson } 702b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->diag)); 710d0321e0SJeremy L Thompson 72cc132f9aSnbeams if (impl->asmb) { 73cc132f9aSnbeams Ceed ceed; 74ca735530SJeremy L Thompson 752b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 762b730f8bSJeremy L Thompson CeedCallCuda(ceed, cuModuleUnload(impl->asmb->module)); 772b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->asmb->d_B_in)); 782b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaFree(impl->asmb->d_B_out)); 79cc132f9aSnbeams } 802b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->asmb)); 81cc132f9aSnbeams 822b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 830d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 840d0321e0SJeremy L Thompson } 850d0321e0SJeremy L Thompson 860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 870d0321e0SJeremy L Thompson // Setup infields or outfields 880d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 89ca735530SJeremy L Thompson static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool is_input, CeedVector *e_vecs, CeedVector *q_vecs, CeedInt e_start, 90ca735530SJeremy L Thompson CeedInt num_fields, CeedInt Q, CeedInt num_elem) { 910d0321e0SJeremy L Thompson Ceed ceed; 92ca735530SJeremy L Thompson bool is_strided, skip_restriction; 93ca735530SJeremy L Thompson CeedSize q_size; 94ca735530SJeremy L Thompson CeedInt dim, size; 95ca735530SJeremy L Thompson CeedQFunctionField *qf_fields; 96ca735530SJeremy L Thompson CeedOperatorField *op_fields; 970d0321e0SJeremy L Thompson 98ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 99ca735530SJeremy L Thompson 100ca735530SJeremy L Thompson if (is_input) { 101ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 102ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 1030d0321e0SJeremy L Thompson } else { 104ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 105ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields)); 1060d0321e0SJeremy L Thompson } 1070d0321e0SJeremy L Thompson 1080d0321e0SJeremy L Thompson // Loop over fields 109ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_fields; i++) { 110ca735530SJeremy L Thompson CeedEvalMode e_mode; 111ca735530SJeremy L Thompson CeedBasis basis; 1120d0321e0SJeremy L Thompson 113ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &e_mode)); 114ca735530SJeremy L Thompson 115ca735530SJeremy L Thompson is_strided = false; 116ca735530SJeremy L Thompson skip_restriction = false; 117ca735530SJeremy L Thompson if (e_mode != CEED_EVAL_WEIGHT) { 118edb2538eSJeremy L Thompson CeedElemRestriction elem_rstr; 119ca735530SJeremy L Thompson 120edb2538eSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); 1210d0321e0SJeremy L Thompson 1220d0321e0SJeremy L Thompson // Check whether this field can skip the element restriction: 123ca735530SJeremy L Thompson // must be passive input, with e_mode NONE, and have a strided restriction with CEED_STRIDES_BACKEND. 1240d0321e0SJeremy L Thompson 1250d0321e0SJeremy L Thompson // First, check whether the field is input or output: 126ca735530SJeremy L Thompson if (is_input) { 127ca735530SJeremy L Thompson CeedVector vec; 128ca735530SJeremy L Thompson 1290d0321e0SJeremy L Thompson // Check for passive input: 130ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); 131ca735530SJeremy L Thompson if (vec != CEED_VECTOR_ACTIVE) { 132ca735530SJeremy L Thompson // Check e_mode 133ca735530SJeremy L Thompson if (e_mode == CEED_EVAL_NONE) { 1340d0321e0SJeremy L Thompson // Check for strided restriction 135edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsStrided(elem_rstr, &is_strided)); 136ca735530SJeremy L Thompson if (is_strided) { 1370d0321e0SJeremy L Thompson // Check if vector is already in preferred backend ordering 138edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(elem_rstr, &skip_restriction)); 1390d0321e0SJeremy L Thompson } 1400d0321e0SJeremy L Thompson } 1410d0321e0SJeremy L Thompson } 1420d0321e0SJeremy L Thompson } 143ca735530SJeremy L Thompson if (skip_restriction) { 144ea61e9acSJeremy L Thompson // We do not need an E-Vector, but will use the input field vector's data directly in the operator application. 145ca735530SJeremy L Thompson e_vecs[i + e_start] = NULL; 1460d0321e0SJeremy L Thompson } else { 147edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs[i + e_start])); 1480d0321e0SJeremy L Thompson } 1490d0321e0SJeremy L Thompson } 1500d0321e0SJeremy L Thompson 151ca735530SJeremy L Thompson switch (e_mode) { 1520d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 153ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); 154ca735530SJeremy L Thompson q_size = (CeedSize)num_elem * Q * size; 155ca735530SJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); 1560d0321e0SJeremy L Thompson break; 1570d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 158ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); 159ca735530SJeremy L Thompson q_size = (CeedSize)num_elem * Q * size; 160ca735530SJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); 1610d0321e0SJeremy L Thompson break; 1620d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 163ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 164ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); 1652b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 166ca735530SJeremy L Thompson q_size = (CeedSize)num_elem * Q * size; 167ca735530SJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); 1680d0321e0SJeremy L Thompson break; 1690d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: // Only on input fields 170ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 171ca735530SJeremy L Thompson q_size = (CeedSize)num_elem * Q; 172ca735530SJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); 173ca735530SJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); 1740d0321e0SJeremy L Thompson break; 1750d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 1760d0321e0SJeremy L Thompson break; // TODO: Not implemented 1770d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 1780d0321e0SJeremy L Thompson break; // TODO: Not implemented 1790d0321e0SJeremy L Thompson } 1800d0321e0SJeremy L Thompson } 1810d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1820d0321e0SJeremy L Thompson } 1830d0321e0SJeremy L Thompson 1840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 185ea61e9acSJeremy L Thompson // CeedOperator needs to connect all the named fields (be they active or passive) to the named inputs and outputs of its CeedQFunction. 1860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1870d0321e0SJeremy L Thompson static int CeedOperatorSetup_Cuda(CeedOperator op) { 1880d0321e0SJeremy L Thompson Ceed ceed; 189ca735530SJeremy L Thompson bool is_setup_done; 190ca735530SJeremy L Thompson CeedInt Q, num_elem, num_input_fields, num_output_fields; 191ca735530SJeremy L Thompson CeedQFunctionField *qf_input_fields, *qf_output_fields; 1920d0321e0SJeremy L Thompson CeedQFunction qf; 193ca735530SJeremy L Thompson CeedOperatorField *op_input_fields, *op_output_fields; 194ca735530SJeremy L Thompson CeedOperator_Cuda *impl; 195ca735530SJeremy L Thompson 196ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorIsSetupDone(op, &is_setup_done)); 197ca735530SJeremy L Thompson if (is_setup_done) return CEED_ERROR_SUCCESS; 198ca735530SJeremy L Thompson 199ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 200ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 2012b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 2022b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 203ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 204ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 205ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 2060d0321e0SJeremy L Thompson 2070d0321e0SJeremy L Thompson // Allocate 208ca735530SJeremy L Thompson CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs)); 2090d0321e0SJeremy L Thompson 210ca735530SJeremy L Thompson CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in)); 211ca735530SJeremy L Thompson CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out)); 2120d0321e0SJeremy L Thompson 213ca735530SJeremy L Thompson impl->num_inputs = num_input_fields; 214ca735530SJeremy L Thompson impl->num_outputs = num_output_fields; 2150d0321e0SJeremy L Thompson 216ca735530SJeremy L Thompson // Set up infield and outfield e_vecs and q_vecs 2170d0321e0SJeremy L Thompson // Infields 218ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, true, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, Q, num_elem)); 2190d0321e0SJeremy L Thompson // Outfields 220ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, false, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, Q, num_elem)); 2210d0321e0SJeremy L Thompson 2222b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetSetupDone(op)); 2230d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2240d0321e0SJeremy L Thompson } 2250d0321e0SJeremy L Thompson 2260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2270d0321e0SJeremy L Thompson // Setup Operator Inputs 2280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 229ca735530SJeremy L Thompson static inline int CeedOperatorSetupInputs_Cuda(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 230ca735530SJeremy L Thompson CeedVector in_vec, const bool skip_active_in, CeedScalar *e_data[2 * CEED_FIELD_MAX], 2310d0321e0SJeremy L Thompson CeedOperator_Cuda *impl, CeedRequest *request) { 232ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 233ca735530SJeremy L Thompson CeedEvalMode e_mode; 2340d0321e0SJeremy L Thompson CeedVector vec; 235edb2538eSJeremy L Thompson CeedElemRestriction elem_rstr; 2360d0321e0SJeremy L Thompson 2370d0321e0SJeremy L Thompson // Get input vector 238ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 2390d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 240ca735530SJeremy L Thompson if (skip_active_in) continue; 241ca735530SJeremy L Thompson else vec = in_vec; 2420d0321e0SJeremy L Thompson } 2430d0321e0SJeremy L Thompson 244ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &e_mode)); 245ca735530SJeremy L Thompson if (e_mode == CEED_EVAL_WEIGHT) { // Skip 2460d0321e0SJeremy L Thompson } else { 2470d0321e0SJeremy L Thompson // Get input element restriction 248edb2538eSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); 249ca735530SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) vec = in_vec; 2500d0321e0SJeremy L Thompson // Restrict, if necessary 251ca735530SJeremy L Thompson if (!impl->e_vecs[i]) { 2520d0321e0SJeremy L Thompson // No restriction for this field; read data directly from vec. 253ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); 2540d0321e0SJeremy L Thompson } else { 255edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs[i], request)); 2560d0321e0SJeremy L Thompson // Get evec 257ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); 2580d0321e0SJeremy L Thompson } 2590d0321e0SJeremy L Thompson } 2600d0321e0SJeremy L Thompson } 2610d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2620d0321e0SJeremy L Thompson } 2630d0321e0SJeremy L Thompson 2640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2650d0321e0SJeremy L Thompson // Input Basis Action 2660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 267ca735530SJeremy L Thompson static inline int CeedOperatorInputBasis_Cuda(CeedInt num_elem, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 268ca735530SJeremy L Thompson CeedInt num_input_fields, const bool skip_active_in, CeedScalar *e_data[2 * CEED_FIELD_MAX], 2692b730f8bSJeremy L Thompson CeedOperator_Cuda *impl) { 270ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 271ca735530SJeremy L Thompson CeedInt elem_size, size; 272ca735530SJeremy L Thompson CeedEvalMode e_mode; 273edb2538eSJeremy L Thompson CeedElemRestriction elem_rstr; 2740d0321e0SJeremy L Thompson CeedBasis basis; 2750d0321e0SJeremy L Thompson 2760d0321e0SJeremy L Thompson // Skip active input 277ca735530SJeremy L Thompson if (skip_active_in) { 2780d0321e0SJeremy L Thompson CeedVector vec; 279ca735530SJeremy L Thompson 280ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 2812b730f8bSJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) continue; 2820d0321e0SJeremy L Thompson } 283ca735530SJeremy L Thompson // Get elem_size, e_mode, size 284edb2538eSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); 285edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); 286ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &e_mode)); 287ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); 2880d0321e0SJeremy L Thompson // Basis action 289ca735530SJeremy L Thompson switch (e_mode) { 2900d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 291ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_DEVICE, CEED_USE_POINTER, e_data[i])); 2920d0321e0SJeremy L Thompson break; 2930d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 294ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); 295ca735530SJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, impl->e_vecs[i], impl->q_vecs_in[i])); 2960d0321e0SJeremy L Thompson break; 2970d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 298ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); 299ca735530SJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, impl->e_vecs[i], impl->q_vecs_in[i])); 3000d0321e0SJeremy L Thompson break; 3010d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 3020d0321e0SJeremy L Thompson break; // No action 3030d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 3040d0321e0SJeremy L Thompson break; // TODO: Not implemented 3050d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 3060d0321e0SJeremy L Thompson break; // TODO: Not implemented 3070d0321e0SJeremy L Thompson } 3080d0321e0SJeremy L Thompson } 3090d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3100d0321e0SJeremy L Thompson } 3110d0321e0SJeremy L Thompson 3120d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3130d0321e0SJeremy L Thompson // Restore Input Vectors 3140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 315ca735530SJeremy L Thompson static inline int CeedOperatorRestoreInputs_Cuda(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 316ca735530SJeremy L Thompson const bool skip_active_in, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Cuda *impl) { 317ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 318ca735530SJeremy L Thompson CeedEvalMode e_mode; 3190d0321e0SJeremy L Thompson CeedVector vec; 3200d0321e0SJeremy L Thompson 3210d0321e0SJeremy L Thompson // Skip active input 322ca735530SJeremy L Thompson if (skip_active_in) { 323ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 3242b730f8bSJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) continue; 3250d0321e0SJeremy L Thompson } 326ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &e_mode)); 327ca735530SJeremy L Thompson if (e_mode == CEED_EVAL_WEIGHT) { // Skip 3280d0321e0SJeremy L Thompson } else { 329ca735530SJeremy L Thompson if (!impl->e_vecs[i]) { // This was a skip_restriction case 330ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 331ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(vec, (const CeedScalar **)&e_data[i])); 3320d0321e0SJeremy L Thompson } else { 333ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs[i], (const CeedScalar **)&e_data[i])); 3340d0321e0SJeremy L Thompson } 3350d0321e0SJeremy L Thompson } 3360d0321e0SJeremy L Thompson } 3370d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3380d0321e0SJeremy L Thompson } 3390d0321e0SJeremy L Thompson 3400d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3410d0321e0SJeremy L Thompson // Apply and add to output 3420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 343ca735530SJeremy L Thompson static int CeedOperatorApplyAdd_Cuda(CeedOperator op, CeedVector in_vec, CeedVector out_vec, CeedRequest *request) { 3440d0321e0SJeremy L Thompson CeedOperator_Cuda *impl; 345ca735530SJeremy L Thompson CeedInt Q, num_elem, elem_size, num_input_fields, num_output_fields, size; 346ca735530SJeremy L Thompson CeedEvalMode e_mode; 347ca735530SJeremy L Thompson CeedScalar *e_data[2 * CEED_FIELD_MAX] = {NULL}; 348ca735530SJeremy L Thompson CeedOperatorField *op_input_fields, *op_output_fields; 349ca735530SJeremy L Thompson CeedQFunctionField *qf_input_fields, *qf_output_fields; 3500d0321e0SJeremy L Thompson CeedQFunction qf; 351ca735530SJeremy L Thompson 352ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 3532b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 3542b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 355ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 356ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 357ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 3580d0321e0SJeremy L Thompson 3590d0321e0SJeremy L Thompson // Setup 3602b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetup_Cuda(op)); 3610d0321e0SJeremy L Thompson 362ca735530SJeremy L Thompson // Input e_vecs and Restriction 363ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorSetupInputs_Cuda(num_input_fields, qf_input_fields, op_input_fields, in_vec, false, e_data, impl, request)); 3640d0321e0SJeremy L Thompson 3650d0321e0SJeremy L Thompson // Input basis apply if needed 366ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorInputBasis_Cuda(num_elem, qf_input_fields, op_input_fields, num_input_fields, false, e_data, impl)); 3670d0321e0SJeremy L Thompson 3680d0321e0SJeremy L Thompson // Output pointers, as necessary 369ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 370ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &e_mode)); 371ca735530SJeremy L Thompson if (e_mode == CEED_EVAL_NONE) { 3720d0321e0SJeremy L Thompson // Set the output Q-Vector to use the E-Vector data directly. 373ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(impl->e_vecs[i + impl->num_inputs], CEED_MEM_DEVICE, &e_data[i + num_input_fields])); 374ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_DEVICE, CEED_USE_POINTER, e_data[i + num_input_fields])); 3750d0321e0SJeremy L Thompson } 3760d0321e0SJeremy L Thompson } 3770d0321e0SJeremy L Thompson 3780d0321e0SJeremy L Thompson // Q function 379ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionApply(qf, num_elem * Q, impl->q_vecs_in, impl->q_vecs_out)); 3800d0321e0SJeremy L Thompson 3810d0321e0SJeremy L Thompson // Output basis apply if needed 382ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 383edb2538eSJeremy L Thompson CeedElemRestriction elem_rstr; 384ca735530SJeremy L Thompson CeedBasis basis; 385ca735530SJeremy L Thompson 386ca735530SJeremy L Thompson // Get elem_size, e_mode, size 387edb2538eSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); 388edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); 389ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &e_mode)); 390ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); 3910d0321e0SJeremy L Thompson // Basis action 392ca735530SJeremy L Thompson switch (e_mode) { 3930d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 3940d0321e0SJeremy L Thompson break; 3950d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 396ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); 397ca735530SJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, CEED_EVAL_INTERP, impl->q_vecs_out[i], impl->e_vecs[i + impl->num_inputs])); 3980d0321e0SJeremy L Thompson break; 3990d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 400ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); 401ca735530SJeremy L Thompson CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, CEED_EVAL_GRAD, impl->q_vecs_out[i], impl->e_vecs[i + impl->num_inputs])); 4020d0321e0SJeremy L Thompson break; 4030d0321e0SJeremy L Thompson // LCOV_EXCL_START 4040d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 4050d0321e0SJeremy L Thompson Ceed ceed; 4062b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 4072b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 4080d0321e0SJeremy L Thompson break; // Should not occur 4090d0321e0SJeremy L Thompson } 4100d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 4110d0321e0SJeremy L Thompson break; // TODO: Not implemented 4120d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 4130d0321e0SJeremy L Thompson break; // TODO: Not implemented 4140d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 4150d0321e0SJeremy L Thompson } 4160d0321e0SJeremy L Thompson } 4170d0321e0SJeremy L Thompson 4180d0321e0SJeremy L Thompson // Output restriction 419ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 420ca735530SJeremy L Thompson CeedVector vec; 421edb2538eSJeremy L Thompson CeedElemRestriction elem_rstr; 422ca735530SJeremy L Thompson 4230d0321e0SJeremy L Thompson // Restore evec 424ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &e_mode)); 425ca735530SJeremy L Thompson if (e_mode == CEED_EVAL_NONE) { 426ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs[i + impl->num_inputs], &e_data[i + num_input_fields])); 4270d0321e0SJeremy L Thompson } 4280d0321e0SJeremy L Thompson // Get output vector 429ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 4300d0321e0SJeremy L Thompson // Restrict 431edb2538eSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); 4320d0321e0SJeremy L Thompson // Active 433ca735530SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) vec = out_vec; 4340d0321e0SJeremy L Thompson 435edb2538eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs[i + impl->num_inputs], vec, request)); 4360d0321e0SJeremy L Thompson } 4370d0321e0SJeremy L Thompson 4380d0321e0SJeremy L Thompson // Restore input arrays 439ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorRestoreInputs_Cuda(num_input_fields, qf_input_fields, op_input_fields, false, e_data, impl)); 4400d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4410d0321e0SJeremy L Thompson } 4420d0321e0SJeremy L Thompson 4430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4440d0321e0SJeremy L Thompson // Core code for assembling linear QFunction 4450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4462b730f8bSJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Cuda(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 4470d0321e0SJeremy L Thompson CeedRequest *request) { 448ca735530SJeremy L Thompson Ceed ceed, ceed_parent; 449ca735530SJeremy L Thompson CeedInt num_active_in, num_active_out, Q, num_elem, num_input_fields, num_output_fields, size; 450e984cf9aSJeremy L Thompson CeedSize q_size; 451ca735530SJeremy L Thompson CeedScalar *assembled_array, *e_data[2 * CEED_FIELD_MAX] = {NULL}; 452ca735530SJeremy L Thompson CeedVector *active_inputs; 453ca735530SJeremy L Thompson CeedQFunctionField *qf_input_fields, *qf_output_fields; 454ca735530SJeremy L Thompson CeedQFunction qf; 455ca735530SJeremy L Thompson CeedOperatorField *op_input_fields, *op_output_fields; 456ca735530SJeremy L Thompson CeedOperator_Cuda *impl; 457ca735530SJeremy L Thompson 4582b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 459ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 460e984cf9aSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 461e984cf9aSJeremy L Thompson CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 462ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 463e984cf9aSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 464ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 465ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 466ca735530SJeremy L Thompson active_inputs = impl->qf_active_in; 467ca735530SJeremy L Thompson num_active_in = impl->num_active_in, num_active_out = impl->num_active_out; 4680d0321e0SJeremy L Thompson 4690d0321e0SJeremy L Thompson // Setup 4702b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetup_Cuda(op)); 4710d0321e0SJeremy L Thompson 472ca735530SJeremy L Thompson // Input e_vecs and Restriction 473ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorSetupInputs_Cuda(num_input_fields, qf_input_fields, op_input_fields, NULL, true, e_data, impl, request)); 4740d0321e0SJeremy L Thompson 4750d0321e0SJeremy L Thompson // Count number of active input fields 476ca735530SJeremy L Thompson if (!num_active_in) { 477ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 478ca735530SJeremy L Thompson CeedScalar *q_vec_array; 479ca735530SJeremy L Thompson CeedVector vec; 480ca735530SJeremy L Thompson 4810d0321e0SJeremy L Thompson // Get input vector 482ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 4830d0321e0SJeremy L Thompson // Check if active input 4840d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 485ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); 486ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); 487ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_DEVICE, &q_vec_array)); 488ca735530SJeremy L Thompson CeedCallBackend(CeedRealloc(num_active_in + size, &active_inputs)); 4890d0321e0SJeremy L Thompson for (CeedInt field = 0; field < size; field++) { 490ca735530SJeremy L Thompson q_size = (CeedSize)Q * num_elem; 491ca735530SJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed, q_size, &active_inputs[num_active_in + field])); 492ca735530SJeremy L Thompson CeedCallBackend( 493ca735530SJeremy L Thompson CeedVectorSetArray(active_inputs[num_active_in + field], CEED_MEM_DEVICE, CEED_USE_POINTER, &q_vec_array[field * Q * num_elem])); 4940d0321e0SJeremy L Thompson } 495ca735530SJeremy L Thompson num_active_in += size; 496ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &q_vec_array)); 4970d0321e0SJeremy L Thompson } 4980d0321e0SJeremy L Thompson } 499ca735530SJeremy L Thompson impl->num_active_in = num_active_in; 500ca735530SJeremy L Thompson impl->qf_active_in = active_inputs; 5010d0321e0SJeremy L Thompson } 5020d0321e0SJeremy L Thompson 5030d0321e0SJeremy L Thompson // Count number of active output fields 504ca735530SJeremy L Thompson if (!num_active_out) { 505ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 506ca735530SJeremy L Thompson CeedVector vec; 507ca735530SJeremy L Thompson 5080d0321e0SJeremy L Thompson // Get output vector 509ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 5100d0321e0SJeremy L Thompson // Check if active output 5110d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 512ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); 513ca735530SJeremy L Thompson num_active_out += size; 5140d0321e0SJeremy L Thompson } 5150d0321e0SJeremy L Thompson } 516ca735530SJeremy L Thompson impl->num_active_out = num_active_out; 5170d0321e0SJeremy L Thompson } 5180d0321e0SJeremy L Thompson 5190d0321e0SJeremy L Thompson // Check sizes 520ca735530SJeremy L Thompson CeedCheck(num_active_in > 0 && num_active_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); 5210d0321e0SJeremy L Thompson 5220d0321e0SJeremy L Thompson // Build objects if needed 5230d0321e0SJeremy L Thompson if (build_objects) { 5240d0321e0SJeremy L Thompson // Create output restriction 525ca735530SJeremy L Thompson CeedInt strides[3] = {1, num_elem * Q, Q}; /* *NOPAD* */ 526ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, Q, num_active_in * num_active_out, 527ca735530SJeremy L Thompson num_active_in * num_active_out * num_elem * Q, strides, rstr)); 5280d0321e0SJeremy L Thompson // Create assembled vector 529ca735530SJeremy L Thompson CeedSize l_size = (CeedSize)num_elem * Q * num_active_in * num_active_out; 530ca735530SJeremy L Thompson CeedCallBackend(CeedVectorCreate(ceed_parent, l_size, assembled)); 5310d0321e0SJeremy L Thompson } 5322b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(*assembled, 0.0)); 533ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArray(*assembled, CEED_MEM_DEVICE, &assembled_array)); 5340d0321e0SJeremy L Thompson 5350d0321e0SJeremy L Thompson // Input basis apply 536ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorInputBasis_Cuda(num_elem, qf_input_fields, op_input_fields, num_input_fields, true, e_data, impl)); 5370d0321e0SJeremy L Thompson 5380d0321e0SJeremy L Thompson // Assemble QFunction 539ca735530SJeremy L Thompson for (CeedInt in = 0; in < num_active_in; in++) { 5400d0321e0SJeremy L Thompson // Set Inputs 541ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetValue(active_inputs[in], 1.0)); 542ca735530SJeremy L Thompson if (num_active_in > 1) { 543ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetValue(active_inputs[(in + num_active_in - 1) % num_active_in], 0.0)); 5440d0321e0SJeremy L Thompson } 5450d0321e0SJeremy L Thompson // Set Outputs 546ca735530SJeremy L Thompson for (CeedInt out = 0; out < num_output_fields; out++) { 547ca735530SJeremy L Thompson CeedVector vec; 548ca735530SJeremy L Thompson 5490d0321e0SJeremy L Thompson // Get output vector 550ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); 5510d0321e0SJeremy L Thompson // Check if active output 5520d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 553ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, CEED_USE_POINTER, assembled_array)); 554ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &size)); 555ca735530SJeremy L Thompson assembled_array += size * Q * num_elem; // Advance the pointer by the size of the output 5560d0321e0SJeremy L Thompson } 5570d0321e0SJeremy L Thompson } 5580d0321e0SJeremy L Thompson // Apply QFunction 559ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionApply(qf, Q * num_elem, impl->q_vecs_in, impl->q_vecs_out)); 5600d0321e0SJeremy L Thompson } 5610d0321e0SJeremy L Thompson 562ca735530SJeremy L Thompson // Un-set output q_vecs to prevent accidental overwrite of Assembled 563ca735530SJeremy L Thompson for (CeedInt out = 0; out < num_output_fields; out++) { 564ca735530SJeremy L Thompson CeedVector vec; 565ca735530SJeremy L Thompson 5660d0321e0SJeremy L Thompson // Get output vector 567ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); 5680d0321e0SJeremy L Thompson // Check if active output 5690d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 570ca735530SJeremy L Thompson CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, NULL)); 5710d0321e0SJeremy L Thompson } 5720d0321e0SJeremy L Thompson } 5730d0321e0SJeremy L Thompson 5740d0321e0SJeremy L Thompson // Restore input arrays 575ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorRestoreInputs_Cuda(num_input_fields, qf_input_fields, op_input_fields, true, e_data, impl)); 5760d0321e0SJeremy L Thompson 5770d0321e0SJeremy L Thompson // Restore output 578ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(*assembled, &assembled_array)); 5790d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5800d0321e0SJeremy L Thompson } 5810d0321e0SJeremy L Thompson 5820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5830d0321e0SJeremy L Thompson // Assemble Linear QFunction 5840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5852b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Cuda(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 5862b730f8bSJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Cuda(op, true, assembled, rstr, request); 5870d0321e0SJeremy L Thompson } 5880d0321e0SJeremy L Thompson 5890d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5900d0321e0SJeremy L Thompson // Update Assembled Linear QFunction 5910d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5922b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Cuda(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 5932b730f8bSJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Cuda(op, false, &assembled, &rstr, request); 5940d0321e0SJeremy L Thompson } 5950d0321e0SJeremy L Thompson 5960d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5970d0321e0SJeremy L Thompson // Assemble diagonal setup 5980d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 599506b1a0cSSebastian Grimberg static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op, CeedInt use_ceedsize_idx) { 6000d0321e0SJeremy L Thompson Ceed ceed; 601ca735530SJeremy L Thompson char *diagonal_kernel_path, *diagonal_kernel_source; 602ca735530SJeremy L Thompson CeedInt num_input_fields, num_output_fields, num_e_mode_in = 0, num_comp = 0, dim = 1, num_e_mode_out = 0, num_nodes, num_qpts; 603ca735530SJeremy L Thompson CeedEvalMode *e_mode_in = NULL, *e_mode_out = NULL; 604ca735530SJeremy L Thompson CeedElemRestriction rstr_in = NULL, rstr_out = NULL; 605ca735530SJeremy L Thompson CeedBasis basis_in = NULL, basis_out = NULL; 606ca735530SJeremy L Thompson CeedQFunctionField *qf_fields; 6070d0321e0SJeremy L Thompson CeedQFunction qf; 608ca735530SJeremy L Thompson CeedOperatorField *op_fields; 609ca735530SJeremy L Thompson CeedOperator_Cuda *impl; 610ca735530SJeremy L Thompson 611ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 6122b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 613ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 6140d0321e0SJeremy L Thompson 6150d0321e0SJeremy L Thompson // Determine active input basis 616ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 617ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 618ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 6190d0321e0SJeremy L Thompson CeedVector vec; 620ca735530SJeremy L Thompson 621ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); 6220d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 623ca735530SJeremy L Thompson CeedEvalMode e_mode; 6240d0321e0SJeremy L Thompson CeedElemRestriction rstr; 625ca735530SJeremy L Thompson 626ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 627ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp)); 628ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); 629ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 630ca735530SJeremy L Thompson CeedCheck(!rstr_in || rstr_in == rstr, ceed, CEED_ERROR_BACKEND, 6316574a04fSJeremy L Thompson "Backend does not implement multi-field non-composite operator diagonal assembly"); 632ca735530SJeremy L Thompson rstr_in = rstr; 633ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &e_mode)); 634ca735530SJeremy L Thompson switch (e_mode) { 6350d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 6360d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 637ca735530SJeremy L Thompson CeedCallBackend(CeedRealloc(num_e_mode_in + 1, &e_mode_in)); 638ca735530SJeremy L Thompson e_mode_in[num_e_mode_in] = e_mode; 639ca735530SJeremy L Thompson num_e_mode_in += 1; 6400d0321e0SJeremy L Thompson break; 6410d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 642ca735530SJeremy L Thompson CeedCallBackend(CeedRealloc(num_e_mode_in + dim, &e_mode_in)); 643ca735530SJeremy L Thompson for (CeedInt d = 0; d < dim; d++) e_mode_in[num_e_mode_in + d] = e_mode; 644ca735530SJeremy L Thompson num_e_mode_in += dim; 6450d0321e0SJeremy L Thompson break; 6460d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 6470d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 6480d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 6490d0321e0SJeremy L Thompson break; // Caught by QF Assembly 6500d0321e0SJeremy L Thompson } 6510d0321e0SJeremy L Thompson } 6520d0321e0SJeremy L Thompson } 6530d0321e0SJeremy L Thompson 6540d0321e0SJeremy L Thompson // Determine active output basis 655ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 656ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields)); 657ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 6580d0321e0SJeremy L Thompson CeedVector vec; 659ca735530SJeremy L Thompson 660ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); 6610d0321e0SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 662ca735530SJeremy L Thompson CeedEvalMode e_mode; 6630d0321e0SJeremy L Thompson CeedElemRestriction rstr; 664ca735530SJeremy L Thompson 665ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 666ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 667ca735530SJeremy L Thompson CeedCheck(!rstr_out || rstr_out == rstr, ceed, CEED_ERROR_BACKEND, 6686574a04fSJeremy L Thompson "Backend does not implement multi-field non-composite operator diagonal assembly"); 669ca735530SJeremy L Thompson rstr_out = rstr; 670ca735530SJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &e_mode)); 671ca735530SJeremy L Thompson switch (e_mode) { 6720d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 6730d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: 674ca735530SJeremy L Thompson CeedCallBackend(CeedRealloc(num_e_mode_out + 1, &e_mode_out)); 675ca735530SJeremy L Thompson e_mode_out[num_e_mode_out] = e_mode; 676ca735530SJeremy L Thompson num_e_mode_out += 1; 6770d0321e0SJeremy L Thompson break; 6780d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: 679ca735530SJeremy L Thompson CeedCallBackend(CeedRealloc(num_e_mode_out + dim, &e_mode_out)); 680ca735530SJeremy L Thompson for (CeedInt d = 0; d < dim; d++) e_mode_out[num_e_mode_out + d] = e_mode; 681ca735530SJeremy L Thompson num_e_mode_out += dim; 6820d0321e0SJeremy L Thompson break; 6830d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: 6840d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 6850d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 6860d0321e0SJeremy L Thompson break; // Caught by QF Assembly 6870d0321e0SJeremy L Thompson } 6880d0321e0SJeremy L Thompson } 6890d0321e0SJeremy L Thompson } 6900d0321e0SJeremy L Thompson 6910d0321e0SJeremy L Thompson // Operator data struct 6922b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 6932b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl->diag)); 6940d0321e0SJeremy L Thompson CeedOperatorDiag_Cuda *diag = impl->diag; 695ca735530SJeremy L Thompson 696ca735530SJeremy L Thompson diag->basis_in = basis_in; 697ca735530SJeremy L Thompson diag->basis_out = basis_out; 698ca735530SJeremy L Thompson diag->h_e_mode_in = e_mode_in; 699ca735530SJeremy L Thompson diag->h_e_mode_out = e_mode_out; 700ca735530SJeremy L Thompson diag->num_e_mode_in = num_e_mode_in; 701ca735530SJeremy L Thompson diag->num_e_mode_out = num_e_mode_out; 7020d0321e0SJeremy L Thompson 7030d0321e0SJeremy L Thompson // Assemble kernel 7042b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h", &diagonal_kernel_path)); 70523d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Kernel Source -----\n"); 7062b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, diagonal_kernel_path, &diagonal_kernel_source)); 70723d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Source Complete! -----\n"); 708ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis_in, &num_nodes)); 709ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 710ca735530SJeremy L Thompson diag->num_nodes = num_nodes; 711ca735530SJeremy L Thompson CeedCallCuda(ceed, 712ca735530SJeremy L Thompson CeedCompile_Cuda(ceed, diagonal_kernel_source, &diag->module, 6, "NUM_E_MODE_IN", num_e_mode_in, "NUM_E_MODE_OUT", num_e_mode_out, 713ca735530SJeremy L Thompson "NUM_NODES", num_nodes, "NUM_QPTS", num_qpts, "NUM_COMP", num_comp, "USE_CEEDSIZE", use_ceedsize_idx)); 714eb7e6cafSJeremy L Thompson CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, diag->module, "linearDiagonal", &diag->linearDiagonal)); 715eb7e6cafSJeremy L Thompson CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, diag->module, "linearPointBlockDiagonal", &diag->linearPointBlock)); 7162b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&diagonal_kernel_path)); 7172b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&diagonal_kernel_source)); 7180d0321e0SJeremy L Thompson 7190d0321e0SJeremy L Thompson // Basis matrices 720ca735530SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 721ca735530SJeremy L Thompson const CeedInt interp_bytes = q_bytes * num_nodes; 722ca735530SJeremy L Thompson const CeedInt grad_bytes = q_bytes * num_nodes * dim; 723ca735530SJeremy L Thompson const CeedInt e_mode_bytes = sizeof(CeedEvalMode); 724ca735530SJeremy L Thompson const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 7250d0321e0SJeremy L Thompson 7260d0321e0SJeremy L Thompson // CEED_EVAL_NONE 7270d0321e0SJeremy L Thompson CeedScalar *identity = NULL; 728ca735530SJeremy L Thompson bool is_eval_none = false; 729ca735530SJeremy L Thompson 730ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_e_mode_in; i++) is_eval_none = is_eval_none || (e_mode_in[i] == CEED_EVAL_NONE); 731ca735530SJeremy L Thompson for (CeedInt i = 0; i < num_e_mode_out; i++) is_eval_none = is_eval_none || (e_mode_out[i] == CEED_EVAL_NONE); 732ca735530SJeremy L Thompson if (is_eval_none) { 733ca735530SJeremy L Thompson CeedCallBackend(CeedCalloc(num_qpts * num_nodes, &identity)); 734ca735530SJeremy L Thompson for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 735ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_identity, interp_bytes)); 736ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_identity, identity, interp_bytes, cudaMemcpyHostToDevice)); 7370d0321e0SJeremy L Thompson } 7380d0321e0SJeremy L Thompson 7390d0321e0SJeremy L Thompson // CEED_EVAL_INTERP 740ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basis_in, &interp_in)); 741ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_interp_in, interp_bytes)); 742ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_interp_in, interp_in, interp_bytes, cudaMemcpyHostToDevice)); 743ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basis_out, &interp_out)); 744ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_interp_out, interp_bytes)); 745ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_interp_out, interp_out, interp_bytes, cudaMemcpyHostToDevice)); 7460d0321e0SJeremy L Thompson 7470d0321e0SJeremy L Thompson // CEED_EVAL_GRAD 748ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basis_in, &grad_in)); 749ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_grad_in, grad_bytes)); 750ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_grad_in, grad_in, grad_bytes, cudaMemcpyHostToDevice)); 751ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basis_out, &grad_out)); 752ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_grad_out, grad_bytes)); 753ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_grad_out, grad_out, grad_bytes, cudaMemcpyHostToDevice)); 7540d0321e0SJeremy L Thompson 755ca735530SJeremy L Thompson // Arrays of e_modes 756ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_e_mode_in, num_e_mode_in * e_mode_bytes)); 757ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_e_mode_in, e_mode_in, num_e_mode_in * e_mode_bytes, cudaMemcpyHostToDevice)); 758ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&diag->d_e_mode_out, num_e_mode_out * e_mode_bytes)); 759ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(diag->d_e_mode_out, e_mode_out, num_e_mode_out * e_mode_bytes, cudaMemcpyHostToDevice)); 7600d0321e0SJeremy L Thompson 7610d0321e0SJeremy L Thompson // Restriction 762ca735530SJeremy L Thompson diag->diag_rstr = rstr_out; 7630d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 7640d0321e0SJeremy L Thompson } 7650d0321e0SJeremy L Thompson 7660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7670d0321e0SJeremy L Thompson // Assemble diagonal common code 7680d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 769ca735530SJeremy L Thompson static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, CeedVector assembled, CeedRequest *request, const bool is_point_block) { 7700d0321e0SJeremy L Thompson Ceed ceed; 771ca735530SJeremy L Thompson CeedSize assembled_length = 0, assembled_qf_length = 0; 772ca735530SJeremy L Thompson CeedInt use_ceedsize_idx = 0, num_elem; 773ca735530SJeremy L Thompson CeedScalar *elem_diag_array; 774ca735530SJeremy L Thompson const CeedScalar *assembled_qf_array; 775ca735530SJeremy L Thompson CeedVector assembled_qf = NULL; 776ca735530SJeremy L Thompson CeedElemRestriction rstr = NULL; 7770d0321e0SJeremy L Thompson CeedOperator_Cuda *impl; 778ca735530SJeremy L Thompson 779ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 7802b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 7810d0321e0SJeremy L Thompson 7820d0321e0SJeremy L Thompson // Assemble QFunction 783ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request)); 7842b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); 7850d0321e0SJeremy L Thompson 786f7c1b517Snbeams CeedCallBackend(CeedVectorGetLength(assembled, &assembled_length)); 787ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(assembled_qf, &assembled_qf_length)); 788ca735530SJeremy L Thompson if ((assembled_length > INT_MAX) || (assembled_qf_length > INT_MAX)) use_ceedsize_idx = 1; 789f7c1b517Snbeams 7900d0321e0SJeremy L Thompson // Setup 791506b1a0cSSebastian Grimberg if (!impl->diag) CeedCallBackend(CeedOperatorAssembleDiagonalSetup_Cuda(op, use_ceedsize_idx)); 7920d0321e0SJeremy L Thompson CeedOperatorDiag_Cuda *diag = impl->diag; 793ca735530SJeremy L Thompson 7940d0321e0SJeremy L Thompson assert(diag != NULL); 7950d0321e0SJeremy L Thompson 7960d0321e0SJeremy L Thompson // Restriction 797506b1a0cSSebastian Grimberg if (is_point_block && !diag->point_block_diag_rstr) { 798506b1a0cSSebastian Grimberg CeedCallBackend(CeedOperatorCreateActivePointBlockRestriction(diag->diag_rstr, &diag->point_block_diag_rstr)); 7990d0321e0SJeremy L Thompson } 800506b1a0cSSebastian Grimberg CeedElemRestriction diag_rstr = is_point_block ? diag->point_block_diag_rstr : diag->diag_rstr; 8010d0321e0SJeremy L Thompson 8020d0321e0SJeremy L Thompson // Create diagonal vector 803ca735530SJeremy L Thompson CeedVector elem_diag = is_point_block ? diag->point_block_elem_diag : diag->elem_diag; 804ca735530SJeremy L Thompson 805ca735530SJeremy L Thompson if (!elem_diag) { 806ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag)); 807ca735530SJeremy L Thompson if (is_point_block) diag->point_block_elem_diag = elem_diag; 808ca735530SJeremy L Thompson else diag->elem_diag = elem_diag; 8090d0321e0SJeremy L Thompson } 810ca735530SJeremy L Thompson CeedCallBackend(CeedVectorSetValue(elem_diag, 0.0)); 8110d0321e0SJeremy L Thompson 812*91db28b6SZach Atkins // Only assemble diagonal if the basis has nodes, otherwise inputs are null pointers 813*91db28b6SZach Atkins if (diag->num_nodes > 0) { 8140d0321e0SJeremy L Thompson // Assemble element operator diagonals 815ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArray(elem_diag, CEED_MEM_DEVICE, &elem_diag_array)); 816ca735530SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_DEVICE, &assembled_qf_array)); 817ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem)); 8180d0321e0SJeremy L Thompson 8190d0321e0SJeremy L Thompson // Compute the diagonal of B^T D B 820ca735530SJeremy L Thompson int elem_per_block = 1; 821ca735530SJeremy L Thompson int grid = num_elem / elem_per_block + ((num_elem / elem_per_block * elem_per_block < num_elem) ? 1 : 0); 822ca735530SJeremy L Thompson void *args[] = {(void *)&num_elem, &diag->d_identity, &diag->d_interp_in, &diag->d_grad_in, &diag->d_interp_out, 823ca735530SJeremy L Thompson &diag->d_grad_out, &diag->d_e_mode_in, &diag->d_e_mode_out, &assembled_qf_array, &elem_diag_array}; 824ca735530SJeremy L Thompson if (is_point_block) { 825ca735530SJeremy L Thompson CeedCallBackend(CeedRunKernelDim_Cuda(ceed, diag->linearPointBlock, grid, diag->num_nodes, 1, elem_per_block, args)); 8260d0321e0SJeremy L Thompson } else { 827ca735530SJeremy L Thompson CeedCallBackend(CeedRunKernelDim_Cuda(ceed, diag->linearDiagonal, grid, diag->num_nodes, 1, elem_per_block, args)); 8280d0321e0SJeremy L Thompson } 8290d0321e0SJeremy L Thompson 8300d0321e0SJeremy L Thompson // Restore arrays 831ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 832ca735530SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 833*91db28b6SZach Atkins } 8340d0321e0SJeremy L Thompson 8350d0321e0SJeremy L Thompson // Assemble local operator diagonal 836ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 8370d0321e0SJeremy L Thompson 8380d0321e0SJeremy L Thompson // Cleanup 839ca735530SJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&assembled_qf)); 8400d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8410d0321e0SJeremy L Thompson } 8420d0321e0SJeremy L Thompson 8430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8440d0321e0SJeremy L Thompson // Assemble Linear Diagonal 8450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8462b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleAddDiagonal_Cuda(CeedOperator op, CeedVector assembled, CeedRequest *request) { 8472b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorAssembleDiagonalCore_Cuda(op, assembled, request, false)); 8486aa95790SJeremy L Thompson return CEED_ERROR_SUCCESS; 8490d0321e0SJeremy L Thompson } 8500d0321e0SJeremy L Thompson 8510d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8520d0321e0SJeremy L Thompson // Assemble Linear Point Block Diagonal 8530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8542b730f8bSJeremy L Thompson static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda(CeedOperator op, CeedVector assembled, CeedRequest *request) { 8552b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorAssembleDiagonalCore_Cuda(op, assembled, request, true)); 8566aa95790SJeremy L Thompson return CEED_ERROR_SUCCESS; 8570d0321e0SJeremy L Thompson } 8580d0321e0SJeremy L Thompson 8590d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 860cc132f9aSnbeams // Single operator assembly setup 861cc132f9aSnbeams //------------------------------------------------------------------------------ 862f7c1b517Snbeams static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_ceedsize_idx) { 863cc132f9aSnbeams Ceed ceed; 864ca735530SJeremy L Thompson char *assembly_kernel_path, *assembly_kernel_source; 865ca735530SJeremy L Thompson CeedInt num_input_fields, num_output_fields, num_e_mode_in = 0, dim = 1, num_B_in_mats_to_load = 0, size_B_in = 0, num_qpts = 0, elem_size = 0, 866ca735530SJeremy L Thompson num_e_mode_out = 0, num_B_out_mats_to_load = 0, size_B_out = 0, num_elem, num_comp; 867ca735530SJeremy L Thompson CeedEvalMode *eval_mode_in = NULL, *eval_mode_out = NULL; 868ca735530SJeremy L Thompson CeedElemRestriction rstr_in = NULL, rstr_out = NULL; 869ca735530SJeremy L Thompson CeedBasis basis_in = NULL, basis_out = NULL; 870ca735530SJeremy L Thompson CeedQFunctionField *qf_fields; 871ca735530SJeremy L Thompson CeedQFunction qf; 872ca735530SJeremy L Thompson CeedOperatorField *input_fields, *output_fields; 873cc132f9aSnbeams CeedOperator_Cuda *impl; 874ca735530SJeremy L Thompson 875ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 8762b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 877cc132f9aSnbeams 878cc132f9aSnbeams // Get intput and output fields 8792b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 880cc132f9aSnbeams 881cc132f9aSnbeams // Determine active input basis eval mode 8822b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 8832b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 884cc132f9aSnbeams // Note that the kernel will treat each dimension of a gradient action separately; 885ca735530SJeremy L Thompson // i.e., when an active input has a CEED_EVAL_GRAD mode, num_e_mode_in will increment by dim. 886ea61e9acSJeremy L Thompson // However, for the purposes of loading the B matrices, it will be treated as one mode, and we will load/copy the entire gradient matrix at once, so 887cc132f9aSnbeams // num_B_in_mats_to_load will be incremented by 1. 888cc132f9aSnbeams for (CeedInt i = 0; i < num_input_fields; i++) { 889cc132f9aSnbeams CeedVector vec; 890ca735530SJeremy L Thompson 8912b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(input_fields[i], &vec)); 892cc132f9aSnbeams if (vec == CEED_VECTOR_ACTIVE) { 893ca735530SJeremy L Thompson CeedEvalMode eval_mode; 894ca735530SJeremy L Thompson 8952b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis_in)); 8962b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); 897ca735530SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 8982b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in)); 899ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 9002b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 901cc132f9aSnbeams if (eval_mode != CEED_EVAL_NONE) { 9022b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(num_B_in_mats_to_load + 1, &eval_mode_in)); 903cc132f9aSnbeams eval_mode_in[num_B_in_mats_to_load] = eval_mode; 904cc132f9aSnbeams num_B_in_mats_to_load += 1; 905cc132f9aSnbeams if (eval_mode == CEED_EVAL_GRAD) { 906ca735530SJeremy L Thompson num_e_mode_in += dim; 907ca735530SJeremy L Thompson size_B_in += dim * elem_size * num_qpts; 908cc132f9aSnbeams } else { 909ca735530SJeremy L Thompson num_e_mode_in += 1; 910ca735530SJeremy L Thompson size_B_in += elem_size * num_qpts; 911cc132f9aSnbeams } 912cc132f9aSnbeams } 913cc132f9aSnbeams } 914cc132f9aSnbeams } 915cc132f9aSnbeams 916cc132f9aSnbeams // Determine active output basis; basis_out and rstr_out only used if same as input, TODO 9172b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields)); 918cc132f9aSnbeams for (CeedInt i = 0; i < num_output_fields; i++) { 919cc132f9aSnbeams CeedVector vec; 920ca735530SJeremy L Thompson 9212b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetVector(output_fields[i], &vec)); 922cc132f9aSnbeams if (vec == CEED_VECTOR_ACTIVE) { 923ca735530SJeremy L Thompson CeedEvalMode eval_mode; 924ca735530SJeremy L Thompson 9252b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis_out)); 9262b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out)); 9276574a04fSJeremy L Thompson CeedCheck(!rstr_out || rstr_out == rstr_in, ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator assembly"); 9282b730f8bSJeremy L Thompson CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 929cc132f9aSnbeams if (eval_mode != CEED_EVAL_NONE) { 9302b730f8bSJeremy L Thompson CeedCallBackend(CeedRealloc(num_B_out_mats_to_load + 1, &eval_mode_out)); 931cc132f9aSnbeams eval_mode_out[num_B_out_mats_to_load] = eval_mode; 932cc132f9aSnbeams num_B_out_mats_to_load += 1; 933cc132f9aSnbeams if (eval_mode == CEED_EVAL_GRAD) { 934ca735530SJeremy L Thompson num_e_mode_out += dim; 935ca735530SJeremy L Thompson size_B_out += dim * elem_size * num_qpts; 936cc132f9aSnbeams } else { 937ca735530SJeremy L Thompson num_e_mode_out += 1; 938ca735530SJeremy L Thompson size_B_out += elem_size * num_qpts; 939cc132f9aSnbeams } 940cc132f9aSnbeams } 941cc132f9aSnbeams } 942cc132f9aSnbeams } 943ca735530SJeremy L Thompson CeedCheck(num_e_mode_in > 0 && num_e_mode_out > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 944cc132f9aSnbeams 945ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 946ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 947cc132f9aSnbeams 9482b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl->asmb)); 949cc132f9aSnbeams CeedOperatorAssemble_Cuda *asmb = impl->asmb; 950ca735530SJeremy L Thompson asmb->num_elem = num_elem; 951cc132f9aSnbeams 952cc132f9aSnbeams // Compile kernels 953ca735530SJeremy L Thompson int elem_per_block = 1; 954ca735530SJeremy L Thompson asmb->elem_per_block = elem_per_block; 955ca735530SJeremy L Thompson CeedInt block_size = elem_size * elem_size * elem_per_block; 956cc132f9aSnbeams Ceed_Cuda *cuda_data; 957ca735530SJeremy L Thompson 9582b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &cuda_data)); 9592b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-operator-assemble.h", &assembly_kernel_path)); 96023d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Kernel Source -----\n"); 9612b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, assembly_kernel_path, &assembly_kernel_source)); 96223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Source Complete! -----\n"); 96307b31e0eSJeremy L Thompson bool fallback = block_size > cuda_data->device_prop.maxThreadsPerBlock; 964ca735530SJeremy L Thompson 96507b31e0eSJeremy L Thompson if (fallback) { 96659ad764aSnbeams // Use fallback kernel with 1D threadblock 967ca735530SJeremy L Thompson block_size = elem_size * elem_per_block; 968ca735530SJeremy L Thompson asmb->block_size_x = elem_size; 96959ad764aSnbeams asmb->block_size_y = 1; 97059ad764aSnbeams } else { // Use kernel with 2D threadblock 971ca735530SJeremy L Thompson asmb->block_size_x = elem_size; 972ca735530SJeremy L Thompson asmb->block_size_y = elem_size; 97307b31e0eSJeremy L Thompson } 974ca735530SJeremy L Thompson CeedCallBackend(CeedCompile_Cuda(ceed, assembly_kernel_source, &asmb->module, 8, "NUM_ELEM", num_elem, "NUM_E_MODE_IN", num_e_mode_in, 975ca735530SJeremy L Thompson "NUM_E_MODE_OUT", num_e_mode_out, "NUM_QPTS", num_qpts, "NUM_NODES", elem_size, "BLOCK_SIZE", block_size, 976ca735530SJeremy L Thompson "NUM_COMP", num_comp, "USE_CEEDSIZE", use_ceedsize_idx)); 977b2165e7aSSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, asmb->module, fallback ? "linearAssembleFallback" : "linearAssemble", &asmb->linearAssemble)); 9782b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&assembly_kernel_path)); 9792b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&assembly_kernel_source)); 980cc132f9aSnbeams 981cc132f9aSnbeams // Build 'full' B matrices (not 1D arrays used for tensor-product matrices) 982cc132f9aSnbeams const CeedScalar *interp_in, *grad_in; 983ca735530SJeremy L Thompson 9842b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basis_in, &interp_in)); 9852b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basis_in, &grad_in)); 986cc132f9aSnbeams 987cc132f9aSnbeams // Load into B_in, in order that they will be used in eval_mode 988cc132f9aSnbeams const CeedInt inBytes = size_B_in * sizeof(CeedScalar); 989cc132f9aSnbeams CeedInt mat_start = 0; 990ca735530SJeremy L Thompson 9912b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&asmb->d_B_in, inBytes)); 992cc132f9aSnbeams for (int i = 0; i < num_B_in_mats_to_load; i++) { 993cc132f9aSnbeams CeedEvalMode eval_mode = eval_mode_in[i]; 994ca735530SJeremy L Thompson 995cc132f9aSnbeams if (eval_mode == CEED_EVAL_INTERP) { 996ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(&asmb->d_B_in[mat_start], interp_in, elem_size * num_qpts * sizeof(CeedScalar), cudaMemcpyHostToDevice)); 997ca735530SJeremy L Thompson mat_start += elem_size * num_qpts; 998cc132f9aSnbeams } else if (eval_mode == CEED_EVAL_GRAD) { 999ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(&asmb->d_B_in[mat_start], grad_in, dim * elem_size * num_qpts * sizeof(CeedScalar), cudaMemcpyHostToDevice)); 1000ca735530SJeremy L Thompson mat_start += dim * elem_size * num_qpts; 1001cc132f9aSnbeams } 1002cc132f9aSnbeams } 1003cc132f9aSnbeams 1004cc132f9aSnbeams const CeedScalar *interp_out, *grad_out; 1005ca735530SJeremy L Thompson 1006ca735530SJeremy L Thompson // Note that this function currently assumes 1 basis, so this should always be true for now 1007cc132f9aSnbeams if (basis_out == basis_in) { 1008cc132f9aSnbeams interp_out = interp_in; 1009cc132f9aSnbeams grad_out = grad_in; 1010cc132f9aSnbeams } else { 10112b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetInterp(basis_out, &interp_out)); 10122b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetGrad(basis_out, &grad_out)); 1013cc132f9aSnbeams } 1014cc132f9aSnbeams 1015cc132f9aSnbeams // Load into B_out, in order that they will be used in eval_mode 1016cc132f9aSnbeams const CeedInt outBytes = size_B_out * sizeof(CeedScalar); 1017cc132f9aSnbeams mat_start = 0; 1018ca735530SJeremy L Thompson 10192b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&asmb->d_B_out, outBytes)); 1020cc132f9aSnbeams for (int i = 0; i < num_B_out_mats_to_load; i++) { 1021cc132f9aSnbeams CeedEvalMode eval_mode = eval_mode_out[i]; 1022ca735530SJeremy L Thompson 1023cc132f9aSnbeams if (eval_mode == CEED_EVAL_INTERP) { 1024ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(&asmb->d_B_out[mat_start], interp_out, elem_size * num_qpts * sizeof(CeedScalar), cudaMemcpyHostToDevice)); 1025ca735530SJeremy L Thompson mat_start += elem_size * num_qpts; 1026cc132f9aSnbeams } else if (eval_mode == CEED_EVAL_GRAD) { 1027ca735530SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy(&asmb->d_B_out[mat_start], grad_out, dim * elem_size * num_qpts * sizeof(CeedScalar), cudaMemcpyHostToDevice)); 1028ca735530SJeremy L Thompson mat_start += dim * elem_size * num_qpts; 1029cc132f9aSnbeams } 1030cc132f9aSnbeams } 1031cc132f9aSnbeams return CEED_ERROR_SUCCESS; 1032cc132f9aSnbeams } 1033cc132f9aSnbeams 1034cc132f9aSnbeams //------------------------------------------------------------------------------ 1035cefa2673SJeremy L Thompson // Assemble matrix data for COO matrix of assembled operator. 1036cefa2673SJeremy L Thompson // The sparsity pattern is set by CeedOperatorLinearAssembleSymbolic. 1037cefa2673SJeremy L Thompson // 1038ea61e9acSJeremy L Thompson // Note that this (and other assembly routines) currently assume only one active input restriction/basis per operator (could have multiple basis eval 1039cefa2673SJeremy L Thompson // modes). 1040cefa2673SJeremy L Thompson // TODO: allow multiple active input restrictions/basis objects 1041cc132f9aSnbeams //------------------------------------------------------------------------------ 10422b730f8bSJeremy L Thompson static int CeedSingleOperatorAssemble_Cuda(CeedOperator op, CeedInt offset, CeedVector values) { 1043cc132f9aSnbeams Ceed ceed; 1044ca735530SJeremy L Thompson CeedSize values_length = 0, assembled_qf_length = 0; 1045ca735530SJeremy L Thompson CeedInt use_ceedsize_idx = 0; 1046ca735530SJeremy L Thompson CeedScalar *values_array; 1047ca735530SJeremy L Thompson const CeedScalar *qf_array; 1048ca735530SJeremy L Thompson CeedVector assembled_qf = NULL; 1049ca735530SJeremy L Thompson CeedElemRestriction rstr_q = NULL; 1050cc132f9aSnbeams CeedOperator_Cuda *impl; 1051ca735530SJeremy L Thompson 1052ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 10532b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorGetData(op, &impl)); 1054cc132f9aSnbeams 1055cc132f9aSnbeams // Assemble QFunction 10562b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 10572b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionDestroy(&rstr_q)); 105828ec399dSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(values, CEED_MEM_DEVICE, &values_array)); 1059cc132f9aSnbeams values_array += offset; 10602b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_DEVICE, &qf_array)); 1061cc132f9aSnbeams 1062f7c1b517Snbeams CeedCallBackend(CeedVectorGetLength(values, &values_length)); 1063f7c1b517Snbeams CeedCallBackend(CeedVectorGetLength(assembled_qf, &assembled_qf_length)); 1064f7c1b517Snbeams if ((values_length > INT_MAX) || (assembled_qf_length > INT_MAX)) use_ceedsize_idx = 1; 1065f7c1b517Snbeams // Setup 1066f7c1b517Snbeams if (!impl->asmb) { 1067f7c1b517Snbeams CeedCallBackend(CeedSingleOperatorAssembleSetup_Cuda(op, use_ceedsize_idx)); 1068f7c1b517Snbeams assert(impl->asmb != NULL); 1069f7c1b517Snbeams } 1070f7c1b517Snbeams 1071cc132f9aSnbeams // Compute B^T D B 1072ca735530SJeremy L Thompson const CeedInt num_elem = impl->asmb->num_elem; 1073ca735530SJeremy L Thompson const CeedInt elem_per_block = impl->asmb->elem_per_block; 1074ca735530SJeremy L Thompson const CeedInt grid = num_elem / elem_per_block + ((num_elem / elem_per_block * elem_per_block < num_elem) ? 1 : 0); 10752b730f8bSJeremy L Thompson void *args[] = {&impl->asmb->d_B_in, &impl->asmb->d_B_out, &qf_array, &values_array}; 1076ca735530SJeremy L Thompson 10772b730f8bSJeremy L Thompson CeedCallBackend( 1078ca735530SJeremy L Thompson CeedRunKernelDim_Cuda(ceed, impl->asmb->linearAssemble, grid, impl->asmb->block_size_x, impl->asmb->block_size_y, elem_per_block, args)); 1079cc132f9aSnbeams 1080cc132f9aSnbeams // Restore arrays 10812b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(values, &values_array)); 10822b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(assembled_qf, &qf_array)); 1083cc132f9aSnbeams 1084cc132f9aSnbeams // Cleanup 10852b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorDestroy(&assembled_qf)); 1086cc132f9aSnbeams return CEED_ERROR_SUCCESS; 1087cc132f9aSnbeams } 1088cc132f9aSnbeams 1089cc132f9aSnbeams //------------------------------------------------------------------------------ 10900d0321e0SJeremy L Thompson // Create operator 10910d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 10920d0321e0SJeremy L Thompson int CeedOperatorCreate_Cuda(CeedOperator op) { 10930d0321e0SJeremy L Thompson Ceed ceed; 10940d0321e0SJeremy L Thompson CeedOperator_Cuda *impl; 10950d0321e0SJeremy L Thompson 1096ca735530SJeremy L Thompson CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 10972b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 10982b730f8bSJeremy L Thompson CeedCallBackend(CeedOperatorSetData(op, impl)); 10990d0321e0SJeremy L Thompson 11002b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Cuda)); 11012b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Cuda)); 11022b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", CeedOperatorLinearAssembleAddDiagonal_Cuda)); 11032b730f8bSJeremy L Thompson CeedCallBackend( 11042b730f8bSJeremy L Thompson CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddPointBlockDiagonal", CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda)); 11052b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleSingle", CeedSingleOperatorAssemble_Cuda)); 11062b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Cuda)); 11072b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Cuda)); 11080d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 11090d0321e0SJeremy L Thompson } 11100d0321e0SJeremy L Thompson 11110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1112