xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
34444f328STzanio //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
54444f328STzanio //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
74444f328STzanio 
890104f39SStan Tomov // magma functions specific to ceed
9509d4af6SJeremy L Thompson #pragma once
1090104f39SStan Tomov 
1149aac155SJeremy L Thompson #include <ceed.h>
12ec3da8bcSJed Brown #include <ceed/backend.h>
13e0582403Sabdelfattah83 #include <magma_v2.h>
14e0582403Sabdelfattah83 
15f6af633fSnbeams #define MAGMA_MAXTHREADS_1D 128
16f6af633fSnbeams #define MAGMA_MAXTHREADS_2D 128
17f6af633fSnbeams #define MAGMA_MAXTHREADS_3D 64
18023b8a51Sabdelfattah83 
19940a72f1SSebastian Grimberg // Define macro for determining number of threads in y-direction for basis kernels
20f6af633fSnbeams #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
21023b8a51Sabdelfattah83 
22940a72f1SSebastian Grimberg // Define macros for non-tensor kernel instances
23940a72f1SSebastian Grimberg #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P 40
24940a72f1SSebastian Grimberg #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q 40
257c7f2ed8SSebastian Grimberg #define MAGMA_NONTENSOR_KERNEL_INSTANCES 7
267c7f2ed8SSebastian Grimberg #define MAGMA_NONTENSOR_KERNEL_N_VALUES 1024, 5120, 10240, 51200, 102400, 512000, 1024000
27023b8a51Sabdelfattah83 
28e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
29c42f38b1Snbeams typedef hipModule_t   CeedMagmaModule;
30c42f38b1Snbeams typedef hipFunction_t CeedMagmaFunction;
31eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Hip
32eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Hip
33eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Hip
34eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Hip
35eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip
36f6af633fSnbeams #else
37c42f38b1Snbeams typedef CUmodule   CeedMagmaModule;
38c42f38b1Snbeams typedef CUfunction CeedMagmaFunction;
39eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Cuda
40eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Cuda
41eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Cuda
42eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda
43eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda
44f6af633fSnbeams #endif
45f6af633fSnbeams 
46e0582403Sabdelfattah83 typedef struct {
47c42f38b1Snbeams   CeedMagmaModule   module;
48940a72f1SSebastian Grimberg   CeedMagmaFunction Interp;
49940a72f1SSebastian Grimberg   CeedMagmaFunction InterpTranspose;
50db2becc9SJeremy L Thompson   CeedMagmaFunction InterpTransposeAdd;
51940a72f1SSebastian Grimberg   CeedMagmaFunction Grad;
52940a72f1SSebastian Grimberg   CeedMagmaFunction GradTranspose;
53db2becc9SJeremy L Thompson   CeedMagmaFunction GradTransposeAdd;
54940a72f1SSebastian Grimberg   CeedMagmaFunction Weight;
5538293ee6SJeremy L Thompson   CeedScalar       *d_interp_1d;
5638293ee6SJeremy L Thompson   CeedScalar       *d_grad_1d;
5738293ee6SJeremy L Thompson   CeedScalar       *d_q_weight_1d;
587f5b9731SStan Tomov } CeedBasis_Magma;
597f5b9731SStan Tomov 
607f5b9731SStan Tomov typedef struct {
617251047cSSebastian Grimberg   CeedMagmaModule   module[MAGMA_NONTENSOR_KERNEL_INSTANCES];
62940a72f1SSebastian Grimberg   CeedMagmaFunction Interp[MAGMA_NONTENSOR_KERNEL_INSTANCES];
63940a72f1SSebastian Grimberg   CeedMagmaFunction InterpTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES];
64db2becc9SJeremy L Thompson   CeedMagmaFunction InterpTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES];
659d15e85bSSebastian Grimberg   CeedMagmaFunction Deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES];
669d15e85bSSebastian Grimberg   CeedMagmaFunction DerivTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES];
67db2becc9SJeremy L Thompson   CeedMagmaFunction DerivTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES];
68940a72f1SSebastian Grimberg   CeedMagmaFunction Weight;
69940a72f1SSebastian Grimberg   CeedInt           NB_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_interp_t[MAGMA_NONTENSOR_KERNEL_INSTANCES];
709d15e85bSSebastian Grimberg   CeedInt           NB_deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_deriv_t[MAGMA_NONTENSOR_KERNEL_INSTANCES];
7138293ee6SJeremy L Thompson   CeedScalar       *d_interp;
7238293ee6SJeremy L Thompson   CeedScalar       *d_grad;
739d15e85bSSebastian Grimberg   CeedScalar       *d_div;
749d15e85bSSebastian Grimberg   CeedScalar       *d_curl;
7538293ee6SJeremy L Thompson   CeedScalar       *d_q_weight;
76868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
77868539c2SNatalie Beams 
7838293ee6SJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
7938293ee6SJeremy L Thompson                                               const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis);
80e4e1133fSSebastian Grimberg CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
8138293ee6SJeremy L Thompson                                         const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);
829d15e85bSSebastian Grimberg CEED_INTERN int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
839d15e85bSSebastian Grimberg                                           const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);
849d15e85bSSebastian Grimberg CEED_INTERN int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
859d15e85bSSebastian Grimberg                                            const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);
86868539c2SNatalie Beams 
87940a72f1SSebastian Grimberg CEED_INTERN magma_int_t magma_isdevptr(const void *);
88940a72f1SSebastian Grimberg 
8958549094SSebastian Grimberg // Comment the line below to use the default magma_is_devptr function
907f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
917f5b9731SStan Tomov 
9258549094SSebastian Grimberg // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing
93e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
94