1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 // magma functions specific to ceed 9 #pragma once 10 11 #include <ceed.h> 12 #include <ceed/backend.h> 13 #include <magma_v2.h> 14 15 #define MAGMA_MAXTHREADS_1D 128 16 #define MAGMA_MAXTHREADS_2D 128 17 #define MAGMA_MAXTHREADS_3D 64 18 19 // Define macro for determining number of threads in y-direction for basis kernels 20 #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x))) 21 22 // Define macros for non-tensor kernel instances 23 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P 40 24 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q 40 25 #define MAGMA_NONTENSOR_KERNEL_INSTANCES 7 26 #define MAGMA_NONTENSOR_KERNEL_N_VALUES 1024, 5120, 10240, 51200, 102400, 512000, 1024000 27 28 #ifdef CEED_MAGMA_USE_HIP 29 typedef hipModule_t CeedMagmaModule; 30 typedef hipFunction_t CeedMagmaFunction; 31 #define CeedCompileMagma CeedCompile_Hip 32 #define CeedGetKernelMagma CeedGetKernel_Hip 33 #define CeedRunKernelMagma CeedRunKernel_Hip 34 #define CeedRunKernelDimMagma CeedRunKernelDim_Hip 35 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip 36 #else 37 typedef CUmodule CeedMagmaModule; 38 typedef CUfunction CeedMagmaFunction; 39 #define CeedCompileMagma CeedCompile_Cuda 40 #define CeedGetKernelMagma CeedGetKernel_Cuda 41 #define CeedRunKernelMagma CeedRunKernel_Cuda 42 #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda 43 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda 44 #endif 45 46 typedef struct { 47 CeedMagmaModule module; 48 CeedMagmaFunction Interp; 49 CeedMagmaFunction InterpTranspose; 50 CeedMagmaFunction InterpTransposeAdd; 51 CeedMagmaFunction Grad; 52 CeedMagmaFunction GradTranspose; 53 CeedMagmaFunction GradTransposeAdd; 54 CeedMagmaFunction Weight; 55 CeedScalar *d_interp_1d; 56 CeedScalar *d_grad_1d; 57 CeedScalar *d_q_weight_1d; 58 } CeedBasis_Magma; 59 60 typedef struct { 61 CeedMagmaModule module[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 62 CeedMagmaFunction Interp[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 63 CeedMagmaFunction InterpTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 64 CeedMagmaFunction InterpTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 65 CeedMagmaFunction Deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 66 CeedMagmaFunction DerivTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 67 CeedMagmaFunction DerivTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 68 CeedMagmaFunction Weight; 69 CeedInt NB_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_interp_t[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 70 CeedInt NB_deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_deriv_t[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 71 CeedScalar *d_interp; 72 CeedScalar *d_grad; 73 CeedScalar *d_div; 74 CeedScalar *d_curl; 75 CeedScalar *d_q_weight; 76 } CeedBasisNonTensor_Magma; 77 78 CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 79 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis); 80 CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 81 const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 82 CEED_INTERN int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 83 const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 84 CEED_INTERN int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 85 const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 86 87 CEED_INTERN magma_int_t magma_isdevptr(const void *); 88 89 // Comment the line below to use the default magma_is_devptr function 90 #define magma_is_devptr magma_isdevptr 91 92 // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing 93 #define ceed_magma_queue_sync(...) 94