1*5aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 34444f328STzanio // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 54444f328STzanio // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 74444f328STzanio 890104f39SStan Tomov // magma functions specific to ceed 994b7b29bSJeremy L Thompson #ifndef CEED_MAGMA_H 1094b7b29bSJeremy L Thompson #define CEED_MAGMA_H 1190104f39SStan Tomov 1249aac155SJeremy L Thompson #include <ceed.h> 13ec3da8bcSJed Brown #include <ceed/backend.h> 14e0582403Sabdelfattah83 #include <magma_v2.h> 15e0582403Sabdelfattah83 16f6af633fSnbeams #define MAGMA_MAXTHREADS_1D 128 17f6af633fSnbeams #define MAGMA_MAXTHREADS_2D 128 18f6af633fSnbeams #define MAGMA_MAXTHREADS_3D 64 19023b8a51Sabdelfattah83 20940a72f1SSebastian Grimberg // Define macro for determining number of threads in y-direction for basis kernels 21f6af633fSnbeams #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x))) 22023b8a51Sabdelfattah83 23940a72f1SSebastian Grimberg // Define macros for non-tensor kernel instances 24940a72f1SSebastian Grimberg #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P 40 25940a72f1SSebastian Grimberg #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q 40 267c7f2ed8SSebastian Grimberg #define MAGMA_NONTENSOR_KERNEL_INSTANCES 7 277c7f2ed8SSebastian Grimberg #define MAGMA_NONTENSOR_KERNEL_N_VALUES 1024, 5120, 10240, 51200, 102400, 512000, 1024000 28023b8a51Sabdelfattah83 29e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP 30c42f38b1Snbeams typedef hipModule_t CeedMagmaModule; 31c42f38b1Snbeams typedef hipFunction_t CeedMagmaFunction; 32eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Hip 33eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Hip 34eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Hip 35eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Hip 36eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip 37f6af633fSnbeams #else 38c42f38b1Snbeams typedef CUmodule CeedMagmaModule; 39c42f38b1Snbeams typedef CUfunction CeedMagmaFunction; 40eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Cuda 41eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Cuda 42eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Cuda 43eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda 44eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda 45f6af633fSnbeams #endif 46f6af633fSnbeams 47e0582403Sabdelfattah83 typedef struct { 48c42f38b1Snbeams CeedMagmaModule module; 49940a72f1SSebastian Grimberg CeedMagmaFunction Interp; 50940a72f1SSebastian Grimberg CeedMagmaFunction InterpTranspose; 51940a72f1SSebastian Grimberg CeedMagmaFunction Grad; 52940a72f1SSebastian Grimberg CeedMagmaFunction GradTranspose; 53940a72f1SSebastian Grimberg CeedMagmaFunction Weight; 5438293ee6SJeremy L Thompson CeedScalar *d_interp_1d; 5538293ee6SJeremy L Thompson CeedScalar *d_grad_1d; 5638293ee6SJeremy L Thompson CeedScalar *d_q_weight_1d; 577f5b9731SStan Tomov } CeedBasis_Magma; 587f5b9731SStan Tomov 597f5b9731SStan Tomov typedef struct { 607251047cSSebastian Grimberg CeedMagmaModule module[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 61940a72f1SSebastian Grimberg CeedMagmaFunction Interp[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 62940a72f1SSebastian Grimberg CeedMagmaFunction InterpTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 639d15e85bSSebastian Grimberg CeedMagmaFunction Deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 649d15e85bSSebastian Grimberg CeedMagmaFunction DerivTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 65940a72f1SSebastian Grimberg CeedMagmaFunction Weight; 66940a72f1SSebastian Grimberg CeedInt NB_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_interp_t[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 679d15e85bSSebastian Grimberg CeedInt NB_deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_deriv_t[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 6838293ee6SJeremy L Thompson CeedScalar *d_interp; 6938293ee6SJeremy L Thompson CeedScalar *d_grad; 709d15e85bSSebastian Grimberg CeedScalar *d_div; 719d15e85bSSebastian Grimberg CeedScalar *d_curl; 7238293ee6SJeremy L Thompson CeedScalar *d_q_weight; 73868539c2SNatalie Beams } CeedBasisNonTensor_Magma; 74868539c2SNatalie Beams 7538293ee6SJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 7638293ee6SJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis); 77e4e1133fSSebastian Grimberg CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 7838293ee6SJeremy L Thompson const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 799d15e85bSSebastian Grimberg CEED_INTERN int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 809d15e85bSSebastian Grimberg const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 819d15e85bSSebastian Grimberg CEED_INTERN int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 829d15e85bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 83868539c2SNatalie Beams 84940a72f1SSebastian Grimberg CEED_INTERN magma_int_t magma_isdevptr(const void *); 85940a72f1SSebastian Grimberg 8658549094SSebastian Grimberg // Comment the line below to use the default magma_is_devptr function 877f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr 887f5b9731SStan Tomov 8958549094SSebastian Grimberg // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing 90e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...) 91e0582403Sabdelfattah83 9294b7b29bSJeremy L Thompson #endif // CEED_MAGMA_H 93