1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 // magma functions specific to ceed 9 #ifndef CEED_MAGMA_H 10 #define CEED_MAGMA_H 11 12 #include <ceed.h> 13 #include <ceed/backend.h> 14 #include <magma_v2.h> 15 16 #define MAGMA_MAXTHREADS_1D 128 17 #define MAGMA_MAXTHREADS_2D 128 18 #define MAGMA_MAXTHREADS_3D 64 19 #define MAGMA_NONTENSOR_MAXTHREADS (128) 20 21 // Define macro for determining number of threads in y-direction 22 // for basis kernels 23 #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x))) 24 #define MAGMA_NONTENSOR_BASIS_NTCOL(N) (CeedIntMax(1, (MAGMA_NONTENSOR_MAXTHREADS / (N)))) 25 #define MAGMA_CEILDIV(A, B) (((A) + (B)-1) / (B)) 26 27 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P (40) 28 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q (40) 29 30 // Define macro for computing the total threads in a block 31 // for use with __launch_bounds__() 32 #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) 33 34 // Define macro for non-tensor kernel instances 35 #define MAGMA_NONTENSOR_KERNEL_INSTANCES (5) 36 #define MAGMA_NONTENSOR_N_VALUES 10240, 51200, 102400, 512000, 1024000 37 38 #ifdef CEED_MAGMA_USE_HIP 39 typedef hipModule_t CeedMagmaModule; 40 typedef hipFunction_t CeedMagmaFunction; 41 #define CeedCompileMagma CeedCompile_Hip 42 #define CeedGetKernelMagma CeedGetKernel_Hip 43 #define CeedRunKernelMagma CeedRunKernel_Hip 44 #define CeedRunKernelDimMagma CeedRunKernelDim_Hip 45 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip 46 #else 47 typedef CUmodule CeedMagmaModule; 48 typedef CUfunction CeedMagmaFunction; 49 #define CeedCompileMagma CeedCompile_Cuda 50 #define CeedGetKernelMagma CeedGetKernel_Cuda 51 #define CeedRunKernelMagma CeedRunKernel_Cuda 52 #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda 53 #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda 54 #endif 55 56 typedef struct { 57 CeedMagmaModule module; 58 CeedMagmaFunction magma_interp; 59 CeedMagmaFunction magma_interp_tr; 60 CeedMagmaFunction magma_grad; 61 CeedMagmaFunction magma_grad_tr; 62 CeedMagmaFunction magma_weight; 63 CeedScalar *d_q_ref_1d; 64 CeedScalar *d_interp_1d; 65 CeedScalar *d_grad_1d; 66 CeedScalar *d_q_weight_1d; 67 } CeedBasis_Magma; 68 69 typedef struct { 70 CeedMagmaModule module[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 71 CeedMagmaFunction magma_interp_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 72 CeedMagmaFunction magma_interp_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 73 CeedMagmaFunction magma_grad_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 74 CeedMagmaFunction magma_grad_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES]; 75 CeedScalar *d_q_ref; 76 CeedScalar *d_interp; 77 CeedScalar *d_grad; 78 CeedScalar *d_q_weight; 79 } CeedBasisNonTensor_Magma; 80 81 CEED_INTERN void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t num_elem, magma_int_t Q, CeedScalar *d_q_weight, 82 CeedScalar *d_v, magma_queue_t queue); 83 84 CEED_INTERN int magma_gemm_nontensor(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha, 85 const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta, 86 CeedScalar *d_C, magma_int_t lddc, magma_queue_t queue); 87 88 CEED_INTERN void gemm_selector(int gpu_arch, char precision, char trans_A, int m, int n, int k, int *n_batch, int *use_magma); 89 90 CEED_INTERN CeedInt nontensor_rtc_get_nb(int gpu_arch, char precision, CeedEvalMode e_mode, CeedTransposeMode t_mode, int P_, int N, int Q_); 91 92 CEED_INTERN magma_int_t magma_isdevptr(const void *A); 93 94 CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 95 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis); 96 97 CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_dof, CeedInt num_qpts, const CeedScalar *interp, 98 const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 99 100 // Comment the line below to use the default magma_is_devptr function 101 #define magma_is_devptr magma_isdevptr 102 103 // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing 104 #define ceed_magma_queue_sync(...) 105 106 #endif // CEED_MAGMA_H 107