xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 00fb7a044a7fd1c8bfdb0605078b0c7ba7a4ad58)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 // magma functions specific to ceed
9 #ifndef _ceed_magma_h
10 #define _ceed_magma_h
11 
12 #include <ceed.h>
13 #include <ceed/backend.h>
14 #include <magma_v2.h>
15 
16 #define MAGMA_MAXTHREADS_1D 128
17 #define MAGMA_MAXTHREADS_2D 128
18 #define MAGMA_MAXTHREADS_3D 64
19 #define MAGMA_NONTENSOR_MAXTHREADS (128)
20 
21 // Define macro for determining number of threads in y-direction
22 // for basis kernels
23 #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
24 #define MAGMA_NONTENSOR_BASIS_NTCOL(N) (CeedIntMax(1, (MAGMA_NONTENSOR_MAXTHREADS / (N))))
25 #define MAGMA_CEILDIV(A, B) (((A) + (B)-1) / (B))
26 
27 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P (40)
28 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q (40)
29 
30 // Define macro for computing the total threads in a block
31 // for use with __launch_bounds__()
32 #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))
33 
34 // Define macro for non-tensor kernel instances
35 #define MAGMA_NONTENSOR_KERNEL_INSTANCES (5)
36 #define MAGMA_NONTENSOR_N_VALUES 10240, 51200, 102400, 512000, 1024000
37 
38 #ifdef CEED_MAGMA_USE_HIP
39 typedef hipModule_t   CeedMagmaModule;
40 typedef hipFunction_t CeedMagmaFunction;
41 #define CeedCompileMagma CeedCompile_Hip
42 #define CeedGetKernelMagma CeedGetKernel_Hip
43 #define CeedRunKernelMagma CeedRunKernel_Hip
44 #define CeedRunKernelDimMagma CeedRunKernelDim_Hip
45 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip
46 #else
47 typedef CUmodule   CeedMagmaModule;
48 typedef CUfunction CeedMagmaFunction;
49 #define CeedCompileMagma CeedCompile_Cuda
50 #define CeedGetKernelMagma CeedGetKernel_Cuda
51 #define CeedRunKernelMagma CeedRunKernel_Cuda
52 #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda
53 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda
54 #endif
55 
56 typedef struct {
57   CeedMagmaModule   module;
58   CeedMagmaFunction magma_interp;
59   CeedMagmaFunction magma_interp_tr;
60   CeedMagmaFunction magma_grad;
61   CeedMagmaFunction magma_grad_tr;
62   CeedMagmaFunction magma_weight;
63   CeedScalar       *dqref1d;
64   CeedScalar       *dinterp1d;
65   CeedScalar       *dgrad1d;
66   CeedScalar       *dqweight1d;
67 } CeedBasis_Magma;
68 
69 typedef struct {
70   CeedMagmaModule   module[MAGMA_NONTENSOR_KERNEL_INSTANCES];
71   CeedMagmaFunction magma_interp_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
72   CeedMagmaFunction magma_interp_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
73   CeedMagmaFunction magma_grad_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
74   CeedMagmaFunction magma_grad_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
75   CeedScalar       *dqref;
76   CeedScalar       *dinterp;
77   CeedScalar       *dgrad;
78   CeedScalar       *dqweight;
79 } CeedBasisNonTensor_Magma;
80 
81 CEED_INTERN void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, magma_int_t Q, CeedScalar *dqweight, CeedScalar *dv,
82                                         magma_queue_t queue);
83 
84 CEED_INTERN int magma_gemm_nontensor(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
85                                      const CeedScalar *dA, magma_int_t ldda, const CeedScalar *dB, magma_int_t lddb, CeedScalar beta, CeedScalar *dC,
86                                      magma_int_t lddc, magma_queue_t queue);
87 
88 CEED_INTERN void gemm_selector(int gpu_arch, char precision, char transA, int m, int n, int k, int *nbatch, int *use_magma);
89 
90 CEED_INTERN CeedInt nontensor_rtc_get_nb(int gpu_arch, char precision, CeedEvalMode emode, CeedTransposeMode tmode, int P_, int N, int Q_);
91 
92 CEED_INTERN magma_int_t magma_isdevptr(const void *A);
93 
94 CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
95                                               const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
96 
97 CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, CeedInt nqpts, const CeedScalar *interp,
98                                         const CeedScalar *grad, const CeedScalar *qref, const CeedScalar *qweight, CeedBasis basis);
99 
100 // Comment the line below to use the default magma_is_devptr function
101 #define magma_is_devptr magma_isdevptr
102 
103 // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing
104 #define ceed_magma_queue_sync(...)
105 
106 #endif  // _ceed_magma_h
107