15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 34444f328STzanio // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 54444f328STzanio // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 74444f328STzanio 890104f39SStan Tomov // magma functions specific to ceed 9509d4af6SJeremy L Thompson #pragma once 1090104f39SStan Tomov 1149aac155SJeremy L Thompson #include <ceed.h> 12ec3da8bcSJed Brown #include <ceed/backend.h> 13e0582403Sabdelfattah83 #include <magma_v2.h> 14e0582403Sabdelfattah83 15f6af633fSnbeams #define MAGMA_MAXTHREADS_1D 128 16f6af633fSnbeams #define MAGMA_MAXTHREADS_2D 128 17f6af633fSnbeams #define MAGMA_MAXTHREADS_3D 64 18023b8a51Sabdelfattah83 19940a72f1SSebastian Grimberg // Define macro for determining number of threads in y-direction for basis kernels 20f6af633fSnbeams #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x))) 21023b8a51Sabdelfattah83 22940a72f1SSebastian Grimberg // Define macros for non-tensor kernel instances 23940a72f1SSebastian Grimberg #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P 40 24940a72f1SSebastian Grimberg #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q 40 257c7f2ed8SSebastian Grimberg #define MAGMA_NONTENSOR_KERNEL_INSTANCES 7 267c7f2ed8SSebastian Grimberg #define MAGMA_NONTENSOR_KERNEL_N_VALUES 1024, 5120, 10240, 51200, 102400, 512000, 1024000 27023b8a51Sabdelfattah83 28e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP 29c42f38b1Snbeams typedef hipModule_t CeedMagmaModule; 30c42f38b1Snbeams typedef hipFunction_t CeedMagmaFunction; 31eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Hip 32eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Hip 33eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Hip 34eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Hip 35eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip 36f6af633fSnbeams #else 37c42f38b1Snbeams typedef CUmodule CeedMagmaModule; 38c42f38b1Snbeams typedef CUfunction CeedMagmaFunction; 39eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Cuda 40eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Cuda 41eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Cuda 42eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda 43eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda 44f6af633fSnbeams #endif 45f6af633fSnbeams 46e0582403Sabdelfattah83 typedef struct { 47c42f38b1Snbeams CeedMagmaModule module; 48940a72f1SSebastian Grimberg CeedMagmaFunction Interp; 49940a72f1SSebastian Grimberg CeedMagmaFunction InterpTranspose; 50*db2becc9SJeremy L Thompson CeedMagmaFunction InterpTransposeAdd; 51940a72f1SSebastian Grimberg CeedMagmaFunction Grad; 52940a72f1SSebastian Grimberg CeedMagmaFunction GradTranspose; 53*db2becc9SJeremy L Thompson CeedMagmaFunction GradTransposeAdd; 54940a72f1SSebastian Grimberg CeedMagmaFunction Weight; 5538293ee6SJeremy L Thompson CeedScalar *d_interp_1d; 5638293ee6SJeremy L Thompson CeedScalar *d_grad_1d; 5738293ee6SJeremy L Thompson CeedScalar *d_q_weight_1d; 587f5b9731SStan Tomov } CeedBasis_Magma; 597f5b9731SStan Tomov 607f5b9731SStan Tomov typedef struct { 617251047cSSebastian Grimberg CeedMagmaModule module[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 62940a72f1SSebastian Grimberg CeedMagmaFunction Interp[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 63940a72f1SSebastian Grimberg CeedMagmaFunction InterpTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 64*db2becc9SJeremy L Thompson CeedMagmaFunction InterpTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 659d15e85bSSebastian Grimberg CeedMagmaFunction Deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 669d15e85bSSebastian Grimberg CeedMagmaFunction DerivTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 67*db2becc9SJeremy L Thompson CeedMagmaFunction DerivTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 68940a72f1SSebastian Grimberg CeedMagmaFunction Weight; 69940a72f1SSebastian Grimberg CeedInt NB_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_interp_t[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 709d15e85bSSebastian Grimberg CeedInt NB_deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_deriv_t[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 7138293ee6SJeremy L Thompson CeedScalar *d_interp; 7238293ee6SJeremy L Thompson CeedScalar *d_grad; 739d15e85bSSebastian Grimberg CeedScalar *d_div; 749d15e85bSSebastian Grimberg CeedScalar *d_curl; 7538293ee6SJeremy L Thompson CeedScalar *d_q_weight; 76868539c2SNatalie Beams } CeedBasisNonTensor_Magma; 77868539c2SNatalie Beams 7838293ee6SJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 7938293ee6SJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis); 80e4e1133fSSebastian Grimberg CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 8138293ee6SJeremy L Thompson const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 829d15e85bSSebastian Grimberg CEED_INTERN int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 839d15e85bSSebastian Grimberg const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 849d15e85bSSebastian Grimberg CEED_INTERN int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 859d15e85bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 86868539c2SNatalie Beams 87940a72f1SSebastian Grimberg CEED_INTERN magma_int_t magma_isdevptr(const void *); 88940a72f1SSebastian Grimberg 8958549094SSebastian Grimberg // Comment the line below to use the default magma_is_devptr function 907f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr 917f5b9731SStan Tomov 9258549094SSebastian Grimberg // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing 93e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...) 94