xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 2dc3fb5f4d99263629ede9783b5752ff8ee2177f)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 // magma functions specific to ceed
9 #ifndef _ceed_magma_h
10 #define _ceed_magma_h
11 
12 #include <ceed/ceed.h>
13 #include <ceed/backend.h>
14 #include <magma_v2.h>
15 
16 #define MAGMA_MAXTHREADS_1D 128
17 #define MAGMA_MAXTHREADS_2D 128
18 #define MAGMA_MAXTHREADS_3D 64
19 // Define macro for determining number of threads in y-direction
20 // for basis kernels
21 #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
22 // Define macro for computing the total threads in a block
23 // for use with __launch_bounds__()
24 #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))
25 
26 #ifdef CEED_MAGMA_USE_HIP
27 typedef hipModule_t CeedMagmaModule;
28 typedef hipFunction_t CeedMagmaFunction;
29 #define CeedCompileMagma CeedCompileHip
30 #define CeedGetKernelMagma CeedGetKernelHip
31 #define CeedRunKernelMagma CeedRunKernelHip
32 #define CeedRunKernelDimMagma CeedRunKernelDimHip
33 #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedHip
34 #else
35 typedef CUmodule CeedMagmaModule;
36 typedef CUfunction CeedMagmaFunction;
37 #define CeedCompileMagma CeedCompileCuda
38 #define CeedGetKernelMagma CeedGetKernelCuda
39 #define CeedRunKernelMagma CeedRunKernelCuda
40 #define CeedRunKernelDimMagma CeedRunKernelDimCuda
41 #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedCuda
42 #endif
43 
44 typedef enum {
45   MAGMA_KERNEL_DIM_GENERIC=101,
46   MAGMA_KERNEL_DIM_SPECIFIC=102
47 } magma_kernel_mode_t;
48 
49 typedef struct {
50   magma_kernel_mode_t basis_kernel_mode;
51   magma_device_t device;
52   magma_queue_t queue;
53 } Ceed_Magma;
54 
55 typedef struct {
56   CeedMagmaModule module;
57   CeedMagmaFunction magma_interp;
58   CeedMagmaFunction magma_interp_tr;
59   CeedMagmaFunction magma_grad;
60   CeedMagmaFunction magma_grad_tr;
61   CeedMagmaFunction magma_weight;
62   CeedScalar *dqref1d;
63   CeedScalar *dinterp1d;
64   CeedScalar *dgrad1d;
65   CeedScalar *dqweight1d;
66 } CeedBasis_Magma;
67 
68 typedef struct {
69   CeedScalar *dqref;
70   CeedScalar *dinterp;
71   CeedScalar *dgrad;
72   CeedScalar *dqweight;
73 } CeedBasisNonTensor_Magma;
74 
75 typedef enum {
76   OWNED_NONE = 0,
77   OWNED_UNPINNED,
78   OWNED_PINNED,
79 } OwnershipMode;
80 
81 typedef struct {
82   CeedMagmaModule module;
83   CeedMagmaFunction StridedTranspose;
84   CeedMagmaFunction StridedNoTranspose;
85   CeedMagmaFunction OffsetTranspose;
86   CeedMagmaFunction OffsetNoTranspose;
87   CeedInt *offsets;
88   CeedInt *doffsets;
89   OwnershipMode own_;
90   int down_;            // cover a case where we own Device memory
91 } CeedElemRestriction_Magma;
92 
93 typedef struct {
94   const CeedScalar **inputs;
95   CeedScalar **outputs;
96   bool setupdone;
97 } CeedQFunction_Magma;
98 
99 #define USE_MAGMA_BATCH
100 #define USE_MAGMA_BATCH2
101 #define USE_MAGMA_BATCH3
102 #define USE_MAGMA_BATCH4
103 
104 CEED_INTERN  void
105 magma_weight_nontensor(
106   magma_int_t grid, magma_int_t threads, magma_int_t nelem,
107   magma_int_t Q,
108   CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
109 
110 CEED_INTERN  int
111 magma_dgemm_nontensor(
112   magma_trans_t transA, magma_trans_t transB,
113   magma_int_t m, magma_int_t n, magma_int_t k,
114   double alpha, const double *dA, magma_int_t ldda,
115   const double *dB, magma_int_t lddb,
116   double beta,  double *dC, magma_int_t lddc,
117   magma_queue_t queue );
118 
119 CEED_INTERN  int
120 magma_sgemm_nontensor(
121   magma_trans_t transA, magma_trans_t transB,
122   magma_int_t m, magma_int_t n, magma_int_t k,
123   float alpha, const float *dA, magma_int_t ldda,
124   const float *dB, magma_int_t lddb,
125   float beta,  float *dC, magma_int_t lddc,
126   magma_queue_t queue );
127 
128 CEED_INTERN  void
129 gemm_selector(
130   int gpu_arch,
131   char precision, char transA,
132   int m, int n, int k,
133   int *nbatch, int *use_magma );
134 
135 CEED_INTERN  magma_int_t
136 magma_isdevptr(const void *A);
137 
138 CEED_INTERN  int
139 CeedBasisCreateTensorH1_Magma(
140   CeedInt dim, CeedInt P1d,
141   CeedInt Q1d,
142   const CeedScalar *interp1d,
143   const CeedScalar *grad1d,
144   const CeedScalar *qref1d,
145   const CeedScalar *qweight1d,
146   CeedBasis basis);
147 
148 CEED_INTERN  int
149 CeedBasisCreateH1_Magma(
150   CeedElemTopology topo, CeedInt dim,
151   CeedInt ndof, CeedInt nqpts,
152   const CeedScalar *interp,
153   const CeedScalar *grad,
154   const CeedScalar *qref,
155   const CeedScalar *qweight,
156   CeedBasis basis);
157 
158 CEED_INTERN  int
159 CeedElemRestrictionCreate_Magma(
160   CeedMemType mtype,
161   CeedCopyMode cmode,
162   const CeedInt *offsets,
163   CeedElemRestriction r);
164 
165 CEED_INTERN  int
166 CeedElemRestrictionCreateBlocked_Magma(
167   const CeedMemType mtype,
168   const CeedCopyMode cmode,
169   const CeedInt *offsets,
170   const CeedElemRestriction res);
171 
172 CEED_INTERN  int CeedOperatorCreate_Magma(CeedOperator op);
173 
174 // comment the line below to use the default magma_is_devptr function
175 #define magma_is_devptr magma_isdevptr
176 
177 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
178 // should do nothing
179 #define ceed_magma_queue_sync(...)
180 
181 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
182 #ifndef MAGMA_BATCH_STRIDE
183 #define MAGMA_BATCH_STRIDE (1000)
184 #endif
185 
186 #endif  // _ceed_magma_h
187