xref: /libCEED/backends/magma/ceed-magma.h (revision edb2538e3dd6743c029967fc4e89c6fcafedb8c2)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 // magma functions specific to ceed
9 #ifndef CEED_MAGMA_H
10 #define CEED_MAGMA_H
11 
12 #include <ceed.h>
13 #include <ceed/backend.h>
14 #include <magma_v2.h>
15 
16 #define MAGMA_MAXTHREADS_1D 128
17 #define MAGMA_MAXTHREADS_2D 128
18 #define MAGMA_MAXTHREADS_3D 64
19 #define MAGMA_NONTENSOR_MAXTHREADS (128)
20 
21 // Define macro for determining number of threads in y-direction
22 // for basis kernels
23 #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
24 #define MAGMA_NONTENSOR_BASIS_NTCOL(N) (CeedIntMax(1, (MAGMA_NONTENSOR_MAXTHREADS / (N))))
25 #define MAGMA_CEILDIV(A, B) (((A) + (B)-1) / (B))
26 
27 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P (40)
28 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q (40)
29 
30 // Define macro for computing the total threads in a block
31 // for use with __launch_bounds__()
32 #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))
33 
34 // Define macro for non-tensor kernel instances
35 #define MAGMA_NONTENSOR_KERNEL_INSTANCES (5)
36 #define MAGMA_NONTENSOR_N_VALUES 10240, 51200, 102400, 512000, 1024000
37 
38 #ifdef CEED_MAGMA_USE_HIP
39 typedef hipModule_t   CeedMagmaModule;
40 typedef hipFunction_t CeedMagmaFunction;
41 #define CeedCompileMagma CeedCompile_Hip
42 #define CeedGetKernelMagma CeedGetKernel_Hip
43 #define CeedRunKernelMagma CeedRunKernel_Hip
44 #define CeedRunKernelDimMagma CeedRunKernelDim_Hip
45 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip
46 #else
47 typedef CUmodule   CeedMagmaModule;
48 typedef CUfunction CeedMagmaFunction;
49 #define CeedCompileMagma CeedCompile_Cuda
50 #define CeedGetKernelMagma CeedGetKernel_Cuda
51 #define CeedRunKernelMagma CeedRunKernel_Cuda
52 #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda
53 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda
54 #endif
55 
56 typedef struct {
57   CeedMagmaModule   module;
58   CeedMagmaFunction magma_interp;
59   CeedMagmaFunction magma_interp_tr;
60   CeedMagmaFunction magma_grad;
61   CeedMagmaFunction magma_grad_tr;
62   CeedMagmaFunction magma_weight;
63   CeedScalar       *d_q_ref_1d;
64   CeedScalar       *d_interp_1d;
65   CeedScalar       *d_grad_1d;
66   CeedScalar       *d_q_weight_1d;
67 } CeedBasis_Magma;
68 
69 typedef struct {
70   CeedMagmaModule   module[MAGMA_NONTENSOR_KERNEL_INSTANCES];
71   CeedMagmaFunction magma_interp_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
72   CeedMagmaFunction magma_interp_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
73   CeedMagmaFunction magma_grad_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
74   CeedMagmaFunction magma_grad_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
75   CeedScalar       *d_q_ref;
76   CeedScalar       *d_interp;
77   CeedScalar       *d_grad;
78   CeedScalar       *d_q_weight;
79 } CeedBasisNonTensor_Magma;
80 
81 CEED_INTERN void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t num_elem, magma_int_t Q, CeedScalar *d_q_weight,
82                                         CeedScalar *d_v, magma_queue_t queue);
83 
84 CEED_INTERN int magma_gemm_nontensor(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
85                                      const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,
86                                      CeedScalar *d_C, magma_int_t lddc, magma_queue_t queue);
87 
88 CEED_INTERN void gemm_selector(int gpu_arch, char precision, char trans_A, int m, int n, int k, int *n_batch, int *use_magma);
89 
90 CEED_INTERN CeedInt nontensor_rtc_get_nb(int gpu_arch, char precision, CeedEvalMode e_mode, CeedTransposeMode t_mode, int P_, int N, int Q_);
91 
92 CEED_INTERN magma_int_t magma_isdevptr(const void *A);
93 
94 CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
95                                               const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis);
96 
97 CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_dof, CeedInt num_qpts, const CeedScalar *interp,
98                                         const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);
99 
100 // Comment the line below to use the default magma_is_devptr function
101 #define magma_is_devptr magma_isdevptr
102 
103 // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing
104 #define ceed_magma_queue_sync(...)
105 
106 #endif  // CEED_MAGMA_H
107