15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 37f5b9731SStan Tomov // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 57f5b9731SStan Tomov // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 77f5b9731SStan Tomov 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 10f6af633fSnbeams #include <ceed/jit-tools.h> 11f6af633fSnbeams #include <string.h> 122b730f8bSJeremy L Thompson 13e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP 14f6af633fSnbeams #include "../hip/ceed-hip-common.h" 15f6af633fSnbeams #include "../hip/ceed-hip-compile.h" 16f6af633fSnbeams #else 17f6af633fSnbeams #include "../cuda/ceed-cuda-common.h" 18f6af633fSnbeams #include "../cuda/ceed-cuda-compile.h" 19f6af633fSnbeams #endif 2000fb7a04SSebastian Grimberg #include "ceed-magma-common.h" 2100fb7a04SSebastian Grimberg #include "ceed-magma.h" 227f5b9731SStan Tomov 23940a72f1SSebastian Grimberg #include "ceed-magma-gemm-nontensor.h" 24940a72f1SSebastian Grimberg #include "ceed-magma-gemm-selector.h" 25940a72f1SSebastian Grimberg 26940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 27940a72f1SSebastian Grimberg // Basis apply - tensor 28940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 29db2becc9SJeremy L Thompson static int CeedBasisApplyCore_Magma(CeedBasis basis, bool apply_add, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, 30db2becc9SJeremy L Thompson CeedVector v) { 317f5b9731SStan Tomov Ceed ceed; 32e0582403Sabdelfattah83 Ceed_Magma *data; 33940a72f1SSebastian Grimberg CeedInt dim, num_comp, num_nodes, P_1d, Q_1d, P, Q; 34940a72f1SSebastian Grimberg const CeedScalar *d_u; 35940a72f1SSebastian Grimberg CeedScalar *d_v; 3638293ee6SJeremy L Thompson CeedBasis_Magma *impl; 3738293ee6SJeremy L Thompson 3838293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 39940a72f1SSebastian Grimberg CeedCallBackend(CeedGetData(ceed, &data)); 40940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetData(basis, &impl)); 4138293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 4238293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 43940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 4438293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 4538293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 46940a72f1SSebastian Grimberg P = P_1d; 47940a72f1SSebastian Grimberg Q = Q_1d; 4838293ee6SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 4938293ee6SJeremy L Thompson P = Q_1d; 5038293ee6SJeremy L Thompson Q = P_1d; 517f5b9731SStan Tomov } 527f5b9731SStan Tomov 53940a72f1SSebastian Grimberg // Read vectors 54940a72f1SSebastian Grimberg if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 55940a72f1SSebastian Grimberg else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 56db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 57db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 58940a72f1SSebastian Grimberg 59940a72f1SSebastian Grimberg // Apply basis operation 60940a72f1SSebastian Grimberg switch (e_mode) { 61940a72f1SSebastian Grimberg case CEED_EVAL_INTERP: { 627f5b9731SStan Tomov // Define element sizes for dofs/quad 6338293ee6SJeremy L Thompson CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim); 6438293ee6SJeremy L Thompson CeedInt elem_dofs_size = CeedIntPow(P_1d, dim); 657f5b9731SStan Tomov 667f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 67868539c2SNatalie Beams // component component 68868539c2SNatalie Beams // elem elem 697f5b9731SStan Tomov // node node 707f5b9731SStan Tomov 717f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 72940a72f1SSebastian Grimberg // Input (d_u) is E-vector, output (d_v) is Q-vector 737f5b9731SStan Tomov 747f5b9731SStan Tomov // Element strides 7538293ee6SJeremy L Thompson CeedInt u_elem_stride = elem_dofs_size; 7638293ee6SJeremy L Thompson CeedInt v_elem_stride = elem_qpts_size; 777f5b9731SStan Tomov // Component strides 7838293ee6SJeremy L Thompson CeedInt u_comp_stride = num_elem * elem_dofs_size; 7938293ee6SJeremy L Thompson CeedInt v_comp_stride = num_elem * elem_qpts_size; 8038293ee6SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 81940a72f1SSebastian Grimberg // Input (d_u) is Q-vector, output (d_v) is E-vector 827f5b9731SStan Tomov // Element strides 8338293ee6SJeremy L Thompson v_elem_stride = elem_dofs_size; 8438293ee6SJeremy L Thompson u_elem_stride = elem_qpts_size; 857f5b9731SStan Tomov // Component strides 8638293ee6SJeremy L Thompson v_comp_stride = num_elem * elem_dofs_size; 8738293ee6SJeremy L Thompson u_comp_stride = num_elem * elem_qpts_size; 887f5b9731SStan Tomov } 8938293ee6SJeremy L Thompson CeedInt num_threads = 1; 9038293ee6SJeremy L Thompson CeedInt num_t_col = 1; 9138293ee6SJeremy L Thompson CeedInt shared_mem = 0; 9238293ee6SJeremy L Thompson CeedInt max_P_Q = CeedIntMax(P, Q); 93f6af633fSnbeams 94f6af633fSnbeams switch (dim) { 95f6af633fSnbeams case 1: 9638293ee6SJeremy L Thompson num_threads = max_P_Q; 9738293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); 9838293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); 9938293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * (P * Q); 100f6af633fSnbeams break; 101f6af633fSnbeams case 2: 10238293ee6SJeremy L Thompson num_threads = max_P_Q; 10338293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); 10438293ee6SJeremy L Thompson shared_mem += P * Q * sizeof(CeedScalar); // for sT 105940a72f1SSebastian Grimberg // for reforming rU we need P x P, and for the intermediate output we need P x Q 106940a72f1SSebastian Grimberg shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar)); 107f6af633fSnbeams break; 108f6af633fSnbeams case 3: 10938293ee6SJeremy L Thompson num_threads = max_P_Q * max_P_Q; 11038293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); 11138293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * (P * Q); // for sT 112940a72f1SSebastian Grimberg // rU needs P^2 x P, the intermediate output needs max(P^2 x Q, P x Q^2) 113940a72f1SSebastian Grimberg shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q)); 114940a72f1SSebastian Grimberg break; 115f6af633fSnbeams } 116940a72f1SSebastian Grimberg CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 117940a72f1SSebastian Grimberg void *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, &v_comp_stride, &num_elem}; 118f6af633fSnbeams 11938293ee6SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 120*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, apply_add ? impl->InterpTransposeAdd : impl->InterpTranspose, NULL, grid, num_threads, 121*e9c76bddSJeremy L Thompson num_t_col, 1, shared_mem, args)); 122f6af633fSnbeams } else { 123*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Interp, NULL, grid, num_threads, num_t_col, 1, shared_mem, args)); 124f6af633fSnbeams } 1252b730f8bSJeremy L Thompson } break; 1263513a710Sjeremylt case CEED_EVAL_GRAD: { 1277f5b9731SStan Tomov // Define element sizes for dofs/quad 12838293ee6SJeremy L Thompson CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim); 12938293ee6SJeremy L Thompson CeedInt elem_dofs_size = CeedIntPow(P_1d, dim); 1307f5b9731SStan Tomov 131940a72f1SSebastian Grimberg // In CEED_NOTRANSPOSE mode: 132940a72f1SSebastian Grimberg // d_u is (P^dim x nc), column-major layout (nc = num_comp) 133940a72f1SSebastian Grimberg // d_v is (Q^dim x nc x dim), column-major layout (nc = num_comp) 134940a72f1SSebastian Grimberg // In CEED_TRANSPOSE mode, the sizes of d_u and d_v are switched. 135940a72f1SSebastian Grimberg 1367f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1377f5b9731SStan Tomov // dim 138868539c2SNatalie Beams // component component 139868539c2SNatalie Beams // elem elem 1407f5b9731SStan Tomov // node node 1417f5b9731SStan Tomov 1427f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 143940a72f1SSebastian Grimberg // Input (d_u) is E-vector, output (d_v) is Q-vector 1447f5b9731SStan Tomov 1457f5b9731SStan Tomov // Element strides 14638293ee6SJeremy L Thompson CeedInt u_elem_stride = elem_dofs_size; 14738293ee6SJeremy L Thompson CeedInt v_elem_stride = elem_qpts_size; 1487f5b9731SStan Tomov // Component strides 14938293ee6SJeremy L Thompson CeedInt u_comp_stride = num_elem * elem_dofs_size; 15038293ee6SJeremy L Thompson CeedInt v_comp_stride = num_elem * elem_qpts_size; 1517f5b9731SStan Tomov // Dimension strides 15238293ee6SJeremy L Thompson CeedInt u_dim_stride = 0; 15338293ee6SJeremy L Thompson CeedInt v_dim_stride = num_elem * elem_qpts_size * num_comp; 15438293ee6SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 155940a72f1SSebastian Grimberg // Input (d_u) is Q-vector, output (d_v) is E-vector 1567f5b9731SStan Tomov // Element strides 15738293ee6SJeremy L Thompson v_elem_stride = elem_dofs_size; 15838293ee6SJeremy L Thompson u_elem_stride = elem_qpts_size; 1597f5b9731SStan Tomov // Component strides 16038293ee6SJeremy L Thompson v_comp_stride = num_elem * elem_dofs_size; 16138293ee6SJeremy L Thompson u_comp_stride = num_elem * elem_qpts_size; 1627f5b9731SStan Tomov // Dimension strides 16338293ee6SJeremy L Thompson v_dim_stride = 0; 16438293ee6SJeremy L Thompson u_dim_stride = num_elem * elem_qpts_size * num_comp; 1657f5b9731SStan Tomov } 16638293ee6SJeremy L Thompson CeedInt num_threads = 1; 16738293ee6SJeremy L Thompson CeedInt num_t_col = 1; 16838293ee6SJeremy L Thompson CeedInt shared_mem = 0; 16938293ee6SJeremy L Thompson CeedInt max_P_Q = CeedIntMax(P, Q); 170f6af633fSnbeams 171f6af633fSnbeams switch (dim) { 172f6af633fSnbeams case 1: 17338293ee6SJeremy L Thompson num_threads = max_P_Q; 17438293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); 17538293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); 17638293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * (P * Q); 177f6af633fSnbeams break; 178f6af633fSnbeams case 2: 17938293ee6SJeremy L Thompson num_threads = max_P_Q; 18038293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); 18138293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * 2 * P * Q; // for sTinterp and sTgrad 182940a72f1SSebastian Grimberg // for reforming rU we need P x P, and for the intermediate output we need P x Q 183940a72f1SSebastian Grimberg shared_mem += sizeof(CeedScalar) * num_t_col * (P * max_P_Q); 184f6af633fSnbeams break; 185f6af633fSnbeams case 3: 18638293ee6SJeremy L Thompson num_threads = max_P_Q * max_P_Q; 18738293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); 18838293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * 2 * P * Q; // for sTinterp and sTgrad 189940a72f1SSebastian Grimberg // rU needs P^2 x P, the intermediate outputs need (P^2 x Q + P x Q^2) 190940a72f1SSebastian Grimberg shared_mem += sizeof(CeedScalar) * num_t_col * CeedIntMax(P * P * P, (P * P * Q) + (P * Q * Q)); 191940a72f1SSebastian Grimberg break; 192f6af633fSnbeams } 193940a72f1SSebastian Grimberg CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 194940a72f1SSebastian Grimberg void *args[] = {&impl->d_interp_1d, &impl->d_grad_1d, &d_u, &u_elem_stride, &u_comp_stride, &u_dim_stride, &d_v, 19538293ee6SJeremy L Thompson &v_elem_stride, &v_comp_stride, &v_dim_stride, &num_elem}; 196f6af633fSnbeams 19738293ee6SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 198*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, apply_add ? impl->GradTransposeAdd : impl->GradTranspose, NULL, grid, num_threads, 199*e9c76bddSJeremy L Thompson num_t_col, 1, shared_mem, args)); 200f6af633fSnbeams } else { 201*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Grad, NULL, grid, num_threads, num_t_col, 1, shared_mem, args)); 202f6af633fSnbeams } 2032b730f8bSJeremy L Thompson } break; 2043513a710Sjeremylt case CEED_EVAL_WEIGHT: { 205940a72f1SSebastian Grimberg CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 206097cc795SJames Wright CeedCheck(impl->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weight_1d not set", CeedEvalModes[e_mode]); 20738293ee6SJeremy L Thompson CeedInt elem_dofs_size = CeedIntPow(Q, dim); 20838293ee6SJeremy L Thompson CeedInt num_threads = 1; 20938293ee6SJeremy L Thompson CeedInt num_t_col = 1; 21038293ee6SJeremy L Thompson CeedInt shared_mem = 0; 211f6af633fSnbeams 212f6af633fSnbeams switch (dim) { 213f6af633fSnbeams case 1: 21438293ee6SJeremy L Thompson num_threads = Q; 21538293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); 21638293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * Q; // for d_q_weight_1d 21738293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * num_t_col * Q; // for output 218f6af633fSnbeams break; 219f6af633fSnbeams case 2: 22038293ee6SJeremy L Thompson num_threads = Q; 22138293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); 22238293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * Q; // for d_q_weight_1d 223f6af633fSnbeams break; 224f6af633fSnbeams case 3: 22538293ee6SJeremy L Thompson num_threads = Q * Q; 22638293ee6SJeremy L Thompson num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); 22738293ee6SJeremy L Thompson shared_mem += sizeof(CeedScalar) * Q; // for d_q_weight_1d 228940a72f1SSebastian Grimberg break; 229f6af633fSnbeams } 230940a72f1SSebastian Grimberg CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 231940a72f1SSebastian Grimberg void *args[] = {&impl->d_q_weight_1d, &d_v, &elem_dofs_size, &num_elem}; 232f6af633fSnbeams 233*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, NULL, grid, num_threads, num_t_col, 1, shared_mem, args)); 2342b730f8bSJeremy L Thompson } break; 2353513a710Sjeremylt // LCOV_EXCL_START 2363513a710Sjeremylt case CEED_EVAL_DIV: 2373513a710Sjeremylt case CEED_EVAL_CURL: 238bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[e_mode]); 2393513a710Sjeremylt case CEED_EVAL_NONE: 2402b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_NONE does not make sense in this context"); 2413513a710Sjeremylt // LCOV_EXCL_STOP 2423513a710Sjeremylt } 2437f5b9731SStan Tomov 244940a72f1SSebastian Grimberg // Must sync to ensure completeness 245e0582403Sabdelfattah83 ceed_magma_queue_sync(data->queue); 246e0582403Sabdelfattah83 247940a72f1SSebastian Grimberg // Restore vectors 24838293ee6SJeremy L Thompson if (e_mode != CEED_EVAL_WEIGHT) { 249940a72f1SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2507f5b9731SStan Tomov } 251940a72f1SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2529bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 253e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2547f5b9731SStan Tomov } 2557f5b9731SStan Tomov 256db2becc9SJeremy L Thompson static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) { 257db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Magma(basis, false, num_elem, t_mode, e_mode, u, v)); 258db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 259db2becc9SJeremy L Thompson } 260db2becc9SJeremy L Thompson 261db2becc9SJeremy L Thompson static int CeedBasisApplyAdd_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) { 262db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Magma(basis, true, num_elem, t_mode, e_mode, u, v)); 263db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 264db2becc9SJeremy L Thompson } 265db2becc9SJeremy L Thompson 266940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 26714950a8eSJeremy L Thompson // Basis apply - tensor AtPoints 26814950a8eSJeremy L Thompson //------------------------------------------------------------------------------ 26914950a8eSJeremy L Thompson int CeedBasisApplyAtPoints_Magma(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 27014950a8eSJeremy L Thompson CeedVector x_ref, CeedVector u, CeedVector v) { 27114950a8eSJeremy L Thompson return CeedError(CeedBasisReturnCeed(basis), CEED_ERROR_BACKEND, "Backend does not implement CeedBasisApplyAtPoints"); 27214950a8eSJeremy L Thompson } 27314950a8eSJeremy L Thompson 27414950a8eSJeremy L Thompson //------------------------------------------------------------------------------ 275940a72f1SSebastian Grimberg // Basis apply - non-tensor 276940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 277db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensorCore_Magma(CeedBasis basis, bool apply_add, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, 278db2becc9SJeremy L Thompson CeedVector u, CeedVector v) { 279868539c2SNatalie Beams Ceed ceed; 280e0582403Sabdelfattah83 Ceed_Magma *data; 2817251047cSSebastian Grimberg CeedInt num_comp, num_nodes, num_qpts, P, Q, N; 2827251047cSSebastian Grimberg const CeedScalar *d_u; 283940a72f1SSebastian Grimberg CeedScalar *d_v; 28438293ee6SJeremy L Thompson CeedBasisNonTensor_Magma *impl; 28538293ee6SJeremy L Thompson 28638293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 28738293ee6SJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &data)); 288940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetData(basis, &impl)); 28938293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 290940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 29138293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 292940a72f1SSebastian Grimberg P = num_nodes; 293940a72f1SSebastian Grimberg Q = num_qpts; 294940a72f1SSebastian Grimberg N = num_elem * num_comp; 29538293ee6SJeremy L Thompson 296940a72f1SSebastian Grimberg // Read vectors 297940a72f1SSebastian Grimberg if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 29838293ee6SJeremy L Thompson else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 299db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 300db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 301868539c2SNatalie Beams 3027251047cSSebastian Grimberg // Compile kernels for N as needed 3037251047cSSebastian Grimberg CeedInt iN = 0; 3047251047cSSebastian Grimberg if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q && (e_mode != CEED_EVAL_WEIGHT || !impl->Weight)) { 305940a72f1SSebastian Grimberg CeedInt n_array[MAGMA_NONTENSOR_KERNEL_INSTANCES] = {MAGMA_NONTENSOR_KERNEL_N_VALUES}; 3067251047cSSebastian Grimberg CeedInt diff = abs(n_array[iN] - N), idiff; 30738293ee6SJeremy L Thompson 308023b8a51Sabdelfattah83 for (CeedInt in = iN + 1; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) { 309940a72f1SSebastian Grimberg idiff = abs(n_array[in] - N); 310023b8a51Sabdelfattah83 if (idiff < diff) { 311023b8a51Sabdelfattah83 iN = in; 312023b8a51Sabdelfattah83 diff = idiff; 313868539c2SNatalie Beams } 31480a9ef05SNatalie Beams } 31580a9ef05SNatalie Beams 316940a72f1SSebastian Grimberg if (!impl->NB_interp[iN]) { 3179d15e85bSSebastian Grimberg CeedFESpace fe_space; 3189d15e85bSSebastian Grimberg CeedInt q_comp_interp, q_comp_deriv; 319940a72f1SSebastian Grimberg Ceed ceed_delegate; 32022070f95SJeremy L Thompson char *basis_kernel_source; 32122070f95SJeremy L Thompson const char *basis_kernel_path, *weight_kernel_path; 322509d4af6SJeremy L Thompson char **file_paths = NULL; 323509d4af6SJeremy L Thompson CeedInt num_file_paths = 0; 324940a72f1SSebastian Grimberg magma_int_t arch = magma_getdevice_arch(); 32580a9ef05SNatalie Beams 326940a72f1SSebastian Grimberg // Tuning parameters for NB 3279d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetFESpace(basis, &fe_space)); 3289d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 3299d15e85bSSebastian Grimberg switch (fe_space) { 3309d15e85bSSebastian Grimberg case CEED_FE_SPACE_H1: 3319d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_deriv)); 3329d15e85bSSebastian Grimberg break; 3339d15e85bSSebastian Grimberg case CEED_FE_SPACE_HDIV: 3349d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_deriv)); 3359d15e85bSSebastian Grimberg break; 3369d15e85bSSebastian Grimberg case CEED_FE_SPACE_HCURL: 3379d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_deriv)); 3389d15e85bSSebastian Grimberg break; 3399d15e85bSSebastian Grimberg } 3409d15e85bSSebastian Grimberg impl->NB_interp[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_interp, P, Q, n_array[iN]); 3419d15e85bSSebastian Grimberg impl->NB_interp_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_interp, P, Q, n_array[iN]); 3429d15e85bSSebastian Grimberg impl->NB_deriv[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_deriv, P, Q, n_array[iN]); 3439d15e85bSSebastian Grimberg impl->NB_deriv_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_deriv, P, Q, n_array[iN]); 344023b8a51Sabdelfattah83 345940a72f1SSebastian Grimberg // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 346940a72f1SSebastian Grimberg CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 347023b8a51Sabdelfattah83 348940a72f1SSebastian Grimberg // Compile kernels 3499d15e85bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h", &basis_kernel_path)); 350940a72f1SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 351509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, basis_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 3527251047cSSebastian Grimberg if (!impl->Weight) { 3537251047cSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 354509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 3557251047cSSebastian Grimberg } 356940a72f1SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 3577251047cSSebastian Grimberg CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[iN], 8, "BASIS_Q_COMP_INTERP", q_comp_interp, 3589d15e85bSSebastian Grimberg "BASIS_Q_COMP_DERIV", q_comp_deriv, "BASIS_P", P, "BASIS_Q", Q, "BASIS_NB_INTERP_N", impl->NB_interp[iN], 3599d15e85bSSebastian Grimberg "BASIS_NB_INTERP_T", impl->NB_interp_t[iN], "BASIS_NB_DERIV_N", impl->NB_deriv[iN], "BASIS_NB_DERIV_T", 3609d15e85bSSebastian Grimberg impl->NB_deriv_t[iN])); 3617251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_n", &impl->Interp[iN])); 3627251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_t", &impl->InterpTranspose[iN])); 363db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_ta", &impl->InterpTransposeAdd[iN])); 3647251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_n", &impl->Deriv[iN])); 3657251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_t", &impl->DerivTranspose[iN])); 366db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_ta", &impl->DerivTransposeAdd[iN])); 3677251047cSSebastian Grimberg if (!impl->Weight) { 3687251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_weight_nontensor", &impl->Weight)); 3697251047cSSebastian Grimberg CeedCallBackend(CeedFree(&weight_kernel_path)); 3707251047cSSebastian Grimberg } 3719d15e85bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 372940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 3735a5594ffSJeremy L Thompson for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); 3745a5594ffSJeremy L Thompson CeedCallBackend(CeedFree(&file_paths)); 3759bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_delegate)); 376940a72f1SSebastian Grimberg } 3777251047cSSebastian Grimberg } 3787251047cSSebastian Grimberg 3797251047cSSebastian Grimberg // Apply basis operation 3807251047cSSebastian Grimberg if (e_mode != CEED_EVAL_WEIGHT) { 3817251047cSSebastian Grimberg const CeedScalar *d_b = NULL; 3827251047cSSebastian Grimberg CeedInt q_comp, NB, M, K; 3837251047cSSebastian Grimberg CeedMagmaFunction Kernel; 3847251047cSSebastian Grimberg 3857251047cSSebastian Grimberg switch (e_mode) { 3867251047cSSebastian Grimberg case CEED_EVAL_INTERP: 3877251047cSSebastian Grimberg d_b = impl->d_interp; 3887251047cSSebastian Grimberg break; 3897251047cSSebastian Grimberg case CEED_EVAL_GRAD: 3907251047cSSebastian Grimberg d_b = impl->d_grad; 3917251047cSSebastian Grimberg break; 3927251047cSSebastian Grimberg case CEED_EVAL_DIV: 3937251047cSSebastian Grimberg d_b = impl->d_div; 3947251047cSSebastian Grimberg break; 3957251047cSSebastian Grimberg case CEED_EVAL_CURL: 3967251047cSSebastian Grimberg d_b = impl->d_curl; 3977251047cSSebastian Grimberg break; 3987251047cSSebastian Grimberg // LCOV_EXCL_START 3997251047cSSebastian Grimberg case CEED_EVAL_WEIGHT: 4007251047cSSebastian Grimberg case CEED_EVAL_NONE: 401bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s does not make sense in this context", CeedEvalModes[e_mode]); 4027251047cSSebastian Grimberg // LCOV_EXCL_STOP 4037251047cSSebastian Grimberg } 4047251047cSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, e_mode, &q_comp)); 4057251047cSSebastian Grimberg M = (t_mode == CEED_TRANSPOSE) ? P : Q, K = (t_mode == CEED_TRANSPOSE) ? Q : P; 4067251047cSSebastian Grimberg 4077251047cSSebastian Grimberg if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 4089d15e85bSSebastian Grimberg if (e_mode == CEED_EVAL_INTERP) { 4099d15e85bSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 410db2becc9SJeremy L Thompson Kernel = apply_add ? impl->InterpTransposeAdd[iN] : impl->InterpTranspose[iN]; 4119d15e85bSSebastian Grimberg NB = impl->NB_interp_t[iN]; 4129d15e85bSSebastian Grimberg } else { 4139d15e85bSSebastian Grimberg Kernel = impl->Interp[iN]; 4149d15e85bSSebastian Grimberg NB = impl->NB_interp[iN]; 4159d15e85bSSebastian Grimberg } 4169d15e85bSSebastian Grimberg } else { 4179d15e85bSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 418db2becc9SJeremy L Thompson Kernel = apply_add ? impl->DerivTransposeAdd[iN] : impl->DerivTranspose[iN]; 4199d15e85bSSebastian Grimberg NB = impl->NB_deriv_t[iN]; 4209d15e85bSSebastian Grimberg } else { 4219d15e85bSSebastian Grimberg Kernel = impl->Deriv[iN]; 4229d15e85bSSebastian Grimberg NB = impl->NB_deriv[iN]; 4239d15e85bSSebastian Grimberg } 4249d15e85bSSebastian Grimberg } 425940a72f1SSebastian Grimberg CeedInt num_t_col = MAGMA_BASIS_NTCOL(M, MAGMA_MAXTHREADS_1D); 4269d15e85bSSebastian Grimberg CeedInt grid = CeedDivUpInt(N, num_t_col * NB); 427833aa127SSebastian Grimberg CeedInt shared_mem_A = P * Q * sizeof(CeedScalar); 428940a72f1SSebastian Grimberg CeedInt shared_mem_B = num_t_col * K * NB * sizeof(CeedScalar); 429833aa127SSebastian Grimberg CeedInt shared_mem = (t_mode != CEED_TRANSPOSE && q_comp > 1) ? (shared_mem_A + shared_mem_B) : CeedIntMax(shared_mem_A, shared_mem_B); 4309d15e85bSSebastian Grimberg void *args[] = {&N, &d_b, &d_u, &d_v}; 431940a72f1SSebastian Grimberg 432*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, Kernel, NULL, grid, M, num_t_col, 1, shared_mem, args)); 4339d15e85bSSebastian Grimberg } else { 4349d15e85bSSebastian Grimberg for (CeedInt d = 0; d < q_comp; d++) { 43538293ee6SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 436db2becc9SJeremy L Thompson const CeedScalar beta = (apply_add || (d > 0)) ? 1.0 : 0.0; 4379d15e85bSSebastian Grimberg magma_gemm_nontensor(MagmaNoTrans, MagmaNoTrans, P, N, Q, 1.0, d_b + d * P * Q, P, d_u + d * N * Q, Q, beta, d_v, P, data->queue); 438940a72f1SSebastian Grimberg } else { 4399d15e85bSSebastian Grimberg magma_gemm_nontensor(MagmaTrans, MagmaNoTrans, Q, N, P, 1.0, d_b + d * P * Q, P, d_u, P, 0.0, d_v + d * N * Q, Q, data->queue); 440940a72f1SSebastian Grimberg } 441940a72f1SSebastian Grimberg } 442940a72f1SSebastian Grimberg } 443940a72f1SSebastian Grimberg } else { 444940a72f1SSebastian Grimberg CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 445097cc795SJames Wright CeedCheck(impl->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weight not set", CeedEvalModes[e_mode]); 446940a72f1SSebastian Grimberg CeedInt num_t_col = MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D); 447940a72f1SSebastian Grimberg CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 448940a72f1SSebastian Grimberg CeedInt shared_mem = Q * sizeof(CeedScalar) + num_t_col * Q * sizeof(CeedScalar); 4499d15e85bSSebastian Grimberg void *args[] = {&num_elem, &impl->d_q_weight, &d_v}; 450868539c2SNatalie Beams 451*e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, NULL, grid, Q, num_t_col, 1, shared_mem, args)); 452940a72f1SSebastian Grimberg } 453940a72f1SSebastian Grimberg 454940a72f1SSebastian Grimberg // Must sync to ensure completeness 455e0582403Sabdelfattah83 ceed_magma_queue_sync(data->queue); 456e0582403Sabdelfattah83 457940a72f1SSebastian Grimberg // Restore vectors 45838293ee6SJeremy L Thompson if (e_mode != CEED_EVAL_WEIGHT) { 459940a72f1SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 460868539c2SNatalie Beams } 461940a72f1SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 4629bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 463e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 464868539c2SNatalie Beams } 465868539c2SNatalie Beams 466db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, 467db2becc9SJeremy L Thompson CeedVector v) { 468db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Magma(basis, false, num_elem, t_mode, e_mode, u, v)); 469db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 470db2becc9SJeremy L Thompson } 471db2becc9SJeremy L Thompson 472db2becc9SJeremy L Thompson static int CeedBasisApplyAddNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, 473db2becc9SJeremy L Thompson CeedVector v) { 474db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Magma(basis, true, num_elem, t_mode, e_mode, u, v)); 475db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 476db2becc9SJeremy L Thompson } 477db2becc9SJeremy L Thompson 478940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 479940a72f1SSebastian Grimberg // Destroy tensor basis 480940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 481940a72f1SSebastian Grimberg static int CeedBasisDestroy_Magma(CeedBasis basis) { 482f6af633fSnbeams Ceed ceed; 48338293ee6SJeremy L Thompson CeedBasis_Magma *impl; 48438293ee6SJeremy L Thompson 4852b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 486940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetData(basis, &impl)); 487e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP 4882b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->module)); 489f6af633fSnbeams #else 4902b730f8bSJeremy L Thompson CeedCallCuda(ceed, cuModuleUnload(impl->module)); 491f6af633fSnbeams #endif 492940a72f1SSebastian Grimberg CeedCallBackend(magma_free(impl->d_interp_1d)); 493940a72f1SSebastian Grimberg CeedCallBackend(magma_free(impl->d_grad_1d)); 494097cc795SJames Wright if (impl->d_q_weight_1d) CeedCallBackend(magma_free(impl->d_q_weight_1d)); 4952b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 4969bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 497e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4987f5b9731SStan Tomov } 4997f5b9731SStan Tomov 500940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 501940a72f1SSebastian Grimberg // Destroy non-tensor basis 502940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 503940a72f1SSebastian Grimberg static int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 504023b8a51Sabdelfattah83 Ceed ceed; 50538293ee6SJeremy L Thompson CeedBasisNonTensor_Magma *impl; 50638293ee6SJeremy L Thompson 507940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 50838293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &impl)); 509940a72f1SSebastian Grimberg for (CeedInt in = 0; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) { 5107251047cSSebastian Grimberg if (impl->module[in]) { 511940a72f1SSebastian Grimberg #ifdef CEED_MAGMA_USE_HIP 5127251047cSSebastian Grimberg CeedCallHip(ceed, hipModuleUnload(impl->module[in])); 513940a72f1SSebastian Grimberg #else 5147251047cSSebastian Grimberg CeedCallCuda(ceed, cuModuleUnload(impl->module[in])); 515940a72f1SSebastian Grimberg #endif 516940a72f1SSebastian Grimberg } 517940a72f1SSebastian Grimberg } 51838293ee6SJeremy L Thompson CeedCallBackend(magma_free(impl->d_interp)); 51938293ee6SJeremy L Thompson CeedCallBackend(magma_free(impl->d_grad)); 5209d15e85bSSebastian Grimberg CeedCallBackend(magma_free(impl->d_div)); 5219d15e85bSSebastian Grimberg CeedCallBackend(magma_free(impl->d_curl)); 522097cc795SJames Wright if (impl->d_q_weight) CeedCallBackend(magma_free(impl->d_q_weight)); 5232b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 5249bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 525e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 526868539c2SNatalie Beams } 527868539c2SNatalie Beams 528940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 529940a72f1SSebastian Grimberg // Create tensor 530940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 531940a72f1SSebastian Grimberg int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 53238293ee6SJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 53338293ee6SJeremy L Thompson Ceed ceed, ceed_delegate; 53438293ee6SJeremy L Thompson Ceed_Magma *data; 53522070f95SJeremy L Thompson char *basis_kernel_source; 53622070f95SJeremy L Thompson const char *interp_kernel_path, *grad_kernel_path, *weight_kernel_path; 537509d4af6SJeremy L Thompson char **file_paths = NULL; 538509d4af6SJeremy L Thompson CeedInt num_file_paths = 0; 539940a72f1SSebastian Grimberg CeedInt num_comp; 5407f5b9731SStan Tomov CeedBasis_Magma *impl; 54138293ee6SJeremy L Thompson 5422b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 5432b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &data)); 544940a72f1SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 545e0582403Sabdelfattah83 546940a72f1SSebastian Grimberg // Copy basis data to GPU 547097cc795SJames Wright if (q_weight_1d) { 548940a72f1SSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_q_weight_1d, Q_1d * sizeof(q_weight_1d[0]))); 549940a72f1SSebastian Grimberg magma_setvector(Q_1d, sizeof(q_weight_1d[0]), q_weight_1d, 1, impl->d_q_weight_1d, 1, data->queue); 550097cc795SJames Wright } 55138293ee6SJeremy L Thompson CeedCallBackend(magma_malloc((void **)&impl->d_interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]))); 55238293ee6SJeremy L Thompson magma_setvector(Q_1d * P_1d, sizeof(interp_1d[0]), interp_1d, 1, impl->d_interp_1d, 1, data->queue); 55338293ee6SJeremy L Thompson CeedCallBackend(magma_malloc((void **)&impl->d_grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]))); 55438293ee6SJeremy L Thompson magma_setvector(Q_1d * P_1d, sizeof(grad_1d[0]), grad_1d, 1, impl->d_grad_1d, 1, data->queue); 5557f5b9731SStan Tomov 556940a72f1SSebastian Grimberg // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 557940a72f1SSebastian Grimberg CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 558940a72f1SSebastian Grimberg 559940a72f1SSebastian Grimberg // Compile kernels 560940a72f1SSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 561940a72f1SSebastian Grimberg { 562940a72f1SSebastian Grimberg char *interp_kernel_name_base = "ceed/jit-source/magma/magma-basis-interp"; 563940a72f1SSebastian Grimberg CeedInt interp_kernel_name_len = strlen(interp_kernel_name_base) + 6; 564940a72f1SSebastian Grimberg char interp_kernel_name[interp_kernel_name_len]; 565940a72f1SSebastian Grimberg 566940a72f1SSebastian Grimberg snprintf(interp_kernel_name, interp_kernel_name_len, "%s-%" CeedInt_FMT "d.h", interp_kernel_name_base, dim); 567940a72f1SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, interp_kernel_name, &interp_kernel_path)); 568940a72f1SSebastian Grimberg } 569940a72f1SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 570509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, interp_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 571940a72f1SSebastian Grimberg { 572940a72f1SSebastian Grimberg char *grad_kernel_name_base = "ceed/jit-source/magma/magma-basis-grad"; 573940a72f1SSebastian Grimberg CeedInt grad_kernel_name_len = strlen(grad_kernel_name_base) + 6; 574940a72f1SSebastian Grimberg char grad_kernel_name[grad_kernel_name_len]; 575940a72f1SSebastian Grimberg 576940a72f1SSebastian Grimberg snprintf(grad_kernel_name, grad_kernel_name_len, "%s-%" CeedInt_FMT "d.h", grad_kernel_name_base, dim); 577940a72f1SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, grad_kernel_name, &grad_kernel_path)); 578940a72f1SSebastian Grimberg } 579509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, grad_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 580940a72f1SSebastian Grimberg { 581940a72f1SSebastian Grimberg char *weight_kernel_name_base = "ceed/jit-source/magma/magma-basis-weight"; 582940a72f1SSebastian Grimberg CeedInt weight_kernel_name_len = strlen(weight_kernel_name_base) + 6; 583940a72f1SSebastian Grimberg char weight_kernel_name[weight_kernel_name_len]; 584940a72f1SSebastian Grimberg 585940a72f1SSebastian Grimberg snprintf(weight_kernel_name, weight_kernel_name_len, "%s-%" CeedInt_FMT "d.h", weight_kernel_name_base, dim); 586940a72f1SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, weight_kernel_name, &weight_kernel_path)); 587940a72f1SSebastian Grimberg } 588509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 589940a72f1SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 590940a72f1SSebastian Grimberg CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module, 5, "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_P", 591940a72f1SSebastian Grimberg P_1d, "BASIS_Q", Q_1d, "BASIS_MAX_P_Q", CeedIntMax(P_1d, Q_1d))); 592940a72f1SSebastian Grimberg switch (dim) { 593940a72f1SSebastian Grimberg case 1: 594940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_1d_kernel", &impl->Interp)); 595940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_1d_kernel", &impl->InterpTranspose)); 596db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpta_1d_kernel", &impl->InterpTransposeAdd)); 597940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_1d_kernel", &impl->Grad)); 598940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_1d_kernel", &impl->GradTranspose)); 599db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradta_1d_kernel", &impl->GradTransposeAdd)); 600940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_1d_kernel", &impl->Weight)); 601940a72f1SSebastian Grimberg break; 602940a72f1SSebastian Grimberg case 2: 603940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_2d_kernel", &impl->Interp)); 604940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_2d_kernel", &impl->InterpTranspose)); 605db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpta_2d_kernel", &impl->InterpTransposeAdd)); 606940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_2d_kernel", &impl->Grad)); 607940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_2d_kernel", &impl->GradTranspose)); 608db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradta_2d_kernel", &impl->GradTransposeAdd)); 609940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_2d_kernel", &impl->Weight)); 610940a72f1SSebastian Grimberg break; 611940a72f1SSebastian Grimberg case 3: 612940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_3d_kernel", &impl->Interp)); 613940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_3d_kernel", &impl->InterpTranspose)); 614db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpta_3d_kernel", &impl->InterpTransposeAdd)); 615940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_3d_kernel", &impl->Grad)); 616940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_3d_kernel", &impl->GradTranspose)); 617db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradta_3d_kernel", &impl->GradTransposeAdd)); 618940a72f1SSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_3d_kernel", &impl->Weight)); 619940a72f1SSebastian Grimberg break; 620940a72f1SSebastian Grimberg } 621940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&interp_kernel_path)); 622940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&grad_kernel_path)); 623940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&weight_kernel_path)); 624940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 6255a5594ffSJeremy L Thompson for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); 6265a5594ffSJeremy L Thompson CeedCallBackend(CeedFree(&file_paths)); 6277f5b9731SStan Tomov 6282b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, impl)); 629940a72f1SSebastian Grimberg 630940a72f1SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Magma)); 631db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Magma)); 63214950a8eSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Magma)); 633940a72f1SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Magma)); 6349bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 6359bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_delegate)); 636e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6377f5b9731SStan Tomov } 6387f5b9731SStan Tomov 639940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 640940a72f1SSebastian Grimberg // Create non-tensor H^1 641940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 642940a72f1SSebastian Grimberg int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 64338293ee6SJeremy L Thompson const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 6447251047cSSebastian Grimberg Ceed ceed; 645e0582403Sabdelfattah83 Ceed_Magma *data; 64638293ee6SJeremy L Thompson CeedBasisNonTensor_Magma *impl; 64738293ee6SJeremy L Thompson 64838293ee6SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 6492b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &data)); 6502b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 651023b8a51Sabdelfattah83 652940a72f1SSebastian Grimberg // Copy basis data to GPU 653097cc795SJames Wright if (q_weight) { 65438293ee6SJeremy L Thompson CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0]))); 65538293ee6SJeremy L Thompson magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue); 656097cc795SJames Wright } 6579d15e85bSSebastian Grimberg if (interp) { 6589d15e85bSSebastian Grimberg CeedInt q_comp_interp; 6599d15e85bSSebastian Grimberg 6609d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 6619d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0]))); 6629d15e85bSSebastian Grimberg magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue); 6639d15e85bSSebastian Grimberg } 6649d15e85bSSebastian Grimberg if (grad) { 6659d15e85bSSebastian Grimberg CeedInt q_comp_grad; 6669d15e85bSSebastian Grimberg 6679d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 6689d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_grad, num_qpts * num_nodes * q_comp_grad * sizeof(grad[0]))); 6699d15e85bSSebastian Grimberg magma_setvector(num_qpts * num_nodes * q_comp_grad, sizeof(grad[0]), grad, 1, impl->d_grad, 1, data->queue); 6709d15e85bSSebastian Grimberg } 6719d15e85bSSebastian Grimberg 6727251047cSSebastian Grimberg // Compile the weight kernel if it won't be compiled later on 6737251047cSSebastian Grimberg if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 6747251047cSSebastian Grimberg Ceed ceed_delegate; 67522070f95SJeremy L Thompson char *basis_kernel_source; 67622070f95SJeremy L Thompson const char *weight_kernel_path; 6777251047cSSebastian Grimberg 6789d15e85bSSebastian Grimberg // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 6799d15e85bSSebastian Grimberg CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 6809d15e85bSSebastian Grimberg 6819d15e85bSSebastian Grimberg // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply) 6829d15e85bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 6839d15e85bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 6849d15e85bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source)); 6859d15e85bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 6867251047cSSebastian Grimberg CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts)); 6877251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight)); 6889d15e85bSSebastian Grimberg CeedCallBackend(CeedFree(&weight_kernel_path)); 6899d15e85bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 6909bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_delegate)); 6917251047cSSebastian Grimberg } 6929d15e85bSSebastian Grimberg 6939d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, impl)); 6949d15e85bSSebastian Grimberg 6959d15e85bSSebastian Grimberg // Register backend functions 6969d15e85bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma)); 697db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Magma)); 6989d15e85bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma)); 6999bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 7009d15e85bSSebastian Grimberg return CEED_ERROR_SUCCESS; 7019d15e85bSSebastian Grimberg } 7029d15e85bSSebastian Grimberg 7039d15e85bSSebastian Grimberg //------------------------------------------------------------------------------ 7049d15e85bSSebastian Grimberg // Create non-tensor H(div) 7059d15e85bSSebastian Grimberg //------------------------------------------------------------------------------ 7069d15e85bSSebastian Grimberg int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 7079d15e85bSSebastian Grimberg const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 7087251047cSSebastian Grimberg Ceed ceed; 7099d15e85bSSebastian Grimberg Ceed_Magma *data; 7109d15e85bSSebastian Grimberg CeedBasisNonTensor_Magma *impl; 7119d15e85bSSebastian Grimberg 7129d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 7139d15e85bSSebastian Grimberg CeedCallBackend(CeedGetData(ceed, &data)); 7149d15e85bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 7159d15e85bSSebastian Grimberg 7169d15e85bSSebastian Grimberg // Copy basis data to GPU 717097cc795SJames Wright if (q_weight) { 7189d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0]))); 7199d15e85bSSebastian Grimberg magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue); 720097cc795SJames Wright } 7219d15e85bSSebastian Grimberg if (interp) { 7229d15e85bSSebastian Grimberg CeedInt q_comp_interp; 7239d15e85bSSebastian Grimberg 7249d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 7259d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0]))); 7269d15e85bSSebastian Grimberg magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue); 7279d15e85bSSebastian Grimberg } 7289d15e85bSSebastian Grimberg if (div) { 7299d15e85bSSebastian Grimberg CeedInt q_comp_div; 7309d15e85bSSebastian Grimberg 7319d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 7329d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_div, num_qpts * num_nodes * q_comp_div * sizeof(div[0]))); 7339d15e85bSSebastian Grimberg magma_setvector(num_qpts * num_nodes * q_comp_div, sizeof(div[0]), div, 1, impl->d_div, 1, data->queue); 7349d15e85bSSebastian Grimberg } 7359d15e85bSSebastian Grimberg 7367251047cSSebastian Grimberg // Compile the weight kernel if it won't be compiled later on 7377251047cSSebastian Grimberg if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 7387251047cSSebastian Grimberg Ceed ceed_delegate; 73922070f95SJeremy L Thompson char *basis_kernel_source; 74022070f95SJeremy L Thompson const char *weight_kernel_path; 7417251047cSSebastian Grimberg 7429d15e85bSSebastian Grimberg // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 7439d15e85bSSebastian Grimberg CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 7449d15e85bSSebastian Grimberg 7459d15e85bSSebastian Grimberg // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply) 7469d15e85bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 7479d15e85bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 7489d15e85bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source)); 7499d15e85bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 7507251047cSSebastian Grimberg CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts)); 7517251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight)); 7529d15e85bSSebastian Grimberg CeedCallBackend(CeedFree(&weight_kernel_path)); 7539d15e85bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 7549bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_delegate)); 7557251047cSSebastian Grimberg } 7569d15e85bSSebastian Grimberg 7579d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, impl)); 7589d15e85bSSebastian Grimberg 7599d15e85bSSebastian Grimberg // Register backend functions 7609d15e85bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma)); 761db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Magma)); 7629d15e85bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma)); 7639bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 7649d15e85bSSebastian Grimberg return CEED_ERROR_SUCCESS; 7659d15e85bSSebastian Grimberg } 7669d15e85bSSebastian Grimberg 7679d15e85bSSebastian Grimberg //------------------------------------------------------------------------------ 7689d15e85bSSebastian Grimberg // Create non-tensor H(curl) 7699d15e85bSSebastian Grimberg //------------------------------------------------------------------------------ 7709d15e85bSSebastian Grimberg int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 7719d15e85bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 7727251047cSSebastian Grimberg Ceed ceed; 7739d15e85bSSebastian Grimberg Ceed_Magma *data; 7749d15e85bSSebastian Grimberg CeedBasisNonTensor_Magma *impl; 7759d15e85bSSebastian Grimberg 7769d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 7779d15e85bSSebastian Grimberg CeedCallBackend(CeedGetData(ceed, &data)); 7789d15e85bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 7799d15e85bSSebastian Grimberg 7809d15e85bSSebastian Grimberg // Copy basis data to GPU 781097cc795SJames Wright if (q_weight) { 7829d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0]))); 7839d15e85bSSebastian Grimberg magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue); 784097cc795SJames Wright } 7859d15e85bSSebastian Grimberg if (interp) { 7869d15e85bSSebastian Grimberg CeedInt q_comp_interp; 7879d15e85bSSebastian Grimberg 7889d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 7899d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0]))); 7909d15e85bSSebastian Grimberg magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue); 7919d15e85bSSebastian Grimberg } 7929d15e85bSSebastian Grimberg if (curl) { 7939d15e85bSSebastian Grimberg CeedInt q_comp_curl; 7949d15e85bSSebastian Grimberg 7959d15e85bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 7969d15e85bSSebastian Grimberg CeedCallBackend(magma_malloc((void **)&impl->d_curl, num_qpts * num_nodes * q_comp_curl * sizeof(curl[0]))); 7979d15e85bSSebastian Grimberg magma_setvector(num_qpts * num_nodes * q_comp_curl, sizeof(curl[0]), curl, 1, impl->d_curl, 1, data->queue); 7989d15e85bSSebastian Grimberg } 799940a72f1SSebastian Grimberg 8007251047cSSebastian Grimberg // Compile the weight kernel if it won't be compiled later on 8017251047cSSebastian Grimberg if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 8027251047cSSebastian Grimberg Ceed ceed_delegate; 80322070f95SJeremy L Thompson char *basis_kernel_source; 80422070f95SJeremy L Thompson const char *weight_kernel_path; 8057251047cSSebastian Grimberg 806940a72f1SSebastian Grimberg // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 807940a72f1SSebastian Grimberg CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 808940a72f1SSebastian Grimberg 809940a72f1SSebastian Grimberg // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply) 810940a72f1SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 811940a72f1SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 812940a72f1SSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source)); 813940a72f1SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 8147251047cSSebastian Grimberg CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts)); 8157251047cSSebastian Grimberg CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight)); 816940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&weight_kernel_path)); 817940a72f1SSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 8189bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_delegate)); 8197251047cSSebastian Grimberg } 820868539c2SNatalie Beams 821023b8a51Sabdelfattah83 CeedCallBackend(CeedBasisSetData(basis, impl)); 822940a72f1SSebastian Grimberg 823940a72f1SSebastian Grimberg // Register backend functions 824940a72f1SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma)); 825db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Magma)); 826940a72f1SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma)); 8279bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 828e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8297f5b9731SStan Tomov } 830940a72f1SSebastian Grimberg 831940a72f1SSebastian Grimberg //------------------------------------------------------------------------------ 832