1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #ifndef _ceed_cuda_h 9 #define _ceed_cuda_h 10 11 #include <ceed/ceed.h> 12 #include <ceed/backend.h> 13 #include <cuda.h> 14 #include "../cuda/ceed-cuda-common.h" 15 16 typedef struct { 17 CeedScalar *h_array; 18 CeedScalar *h_array_borrowed; 19 CeedScalar *h_array_owned; 20 CeedScalar *d_array; 21 CeedScalar *d_array_borrowed; 22 CeedScalar *d_array_owned; 23 } CeedVector_Cuda; 24 25 typedef struct { 26 CUmodule module; 27 CUfunction StridedTranspose; 28 CUfunction StridedNoTranspose; 29 CUfunction OffsetTranspose; 30 CUfunction OffsetNoTranspose; 31 CeedInt num_nodes; 32 CeedInt *h_ind; 33 CeedInt *h_ind_allocated; 34 CeedInt *d_ind; 35 CeedInt *d_ind_allocated; 36 CeedInt *d_t_offsets; 37 CeedInt *d_t_indices; 38 CeedInt *d_l_vec_indices; 39 } CeedElemRestriction_Cuda; 40 41 typedef struct { 42 CUmodule module; 43 CUfunction Interp; 44 CUfunction Grad; 45 CUfunction Weight; 46 CeedScalar *d_interp_1d; 47 CeedScalar *d_grad_1d; 48 CeedScalar *d_q_weight_1d; 49 } CeedBasis_Cuda; 50 51 typedef struct { 52 CUmodule module; 53 CUfunction Interp; 54 CUfunction Grad; 55 CUfunction Weight; 56 CeedScalar *d_interp; 57 CeedScalar *d_grad; 58 CeedScalar *d_q_weight; 59 } CeedBasisNonTensor_Cuda; 60 61 // We use a struct to avoid having to memCpy the array of pointers 62 // __global__ copies by value the struct. 63 typedef struct { 64 const CeedScalar *inputs[CEED_FIELD_MAX]; 65 CeedScalar *outputs[CEED_FIELD_MAX]; 66 } Fields_Cuda; 67 68 typedef struct { 69 CUmodule module; 70 char *qfunction_name; 71 char *qfunction_source; 72 CUfunction QFunction; 73 Fields_Cuda fields; 74 void *d_c; 75 } CeedQFunction_Cuda; 76 77 typedef struct { 78 void *h_data; 79 void *h_data_borrowed; 80 void *h_data_owned; 81 void *d_data; 82 void *d_data_borrowed; 83 void *d_data_owned; 84 } CeedQFunctionContext_Cuda; 85 86 typedef struct { 87 CUmodule module; 88 CUfunction linearDiagonal; 89 CUfunction linearPointBlock; 90 CeedBasis basisin, basisout; 91 CeedElemRestriction diagrstr, pbdiagrstr; 92 CeedVector elemdiag, pbelemdiag; 93 CeedInt numemodein, numemodeout, nnodes; 94 CeedEvalMode *h_emodein, *h_emodeout; 95 CeedEvalMode *d_emodein, *d_emodeout; 96 CeedScalar *d_identity, *d_interpin, *d_interpout, *d_gradin, *d_gradout; 97 } CeedOperatorDiag_Cuda; 98 99 typedef struct { 100 CUmodule module; 101 CUfunction linearAssemble; 102 CeedInt nelem, block_size_x, block_size_y, elemsPerBlock; 103 CeedScalar *d_B_in, *d_B_out; 104 } CeedOperatorAssemble_Cuda; 105 106 typedef struct { 107 CeedVector *evecs; // E-vectors, inputs followed by outputs 108 CeedVector *qvecsin; // Input Q-vectors needed to apply operator 109 CeedVector *qvecsout; // Output Q-vectors needed to apply operator 110 CeedInt numein; 111 CeedInt numeout; 112 CeedInt qfnumactivein, qfnumactiveout; 113 CeedVector *qfactivein; 114 CeedOperatorDiag_Cuda *diag; 115 CeedOperatorAssemble_Cuda *asmb; 116 } CeedOperator_Cuda; 117 118 CEED_INTERN int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle); 119 120 CEED_INTERN int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec); 121 122 CEED_INTERN int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, 123 CeedCopyMode copy_mode, const CeedInt *indices, CeedElemRestriction r); 124 125 CEED_INTERN int CeedElemRestrictionCreateBlocked_Cuda(const CeedMemType 126 mem_type, 127 const CeedCopyMode copy_mode, const CeedInt *indices, 128 const CeedElemRestriction res); 129 130 CEED_INTERN int CeedBasisApplyElems_Cuda(CeedBasis basis, 131 const CeedInt num_elem, 132 CeedTransposeMode t_mode, CeedEvalMode eval_mode, const CeedVector u, 133 CeedVector v); 134 135 CEED_INTERN int CeedQFunctionApplyElems_Cuda(CeedQFunction qf, const CeedInt Q, 136 const CeedVector *const u, const CeedVector *v); 137 138 CEED_INTERN int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, 139 CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 140 const CeedScalar *qref_1d, const CeedScalar *qweight_1d, CeedBasis basis); 141 142 CEED_INTERN int CeedBasisCreateH1_Cuda(CeedElemTopology, CeedInt, CeedInt, 143 CeedInt, const CeedScalar *, 144 const CeedScalar *, const CeedScalar *, 145 const CeedScalar *, CeedBasis); 146 147 CEED_INTERN int CeedQFunctionCreate_Cuda(CeedQFunction qf); 148 149 CEED_INTERN int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx); 150 151 CEED_INTERN int CeedOperatorCreate_Cuda(CeedOperator op); 152 153 #endif 154