1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #ifndef _ceed_cuda_h 9 #define _ceed_cuda_h 10 11 #include <ceed.h> 12 #include <ceed/backend.h> 13 #include <ceed/jit-source/cuda/cuda-types.h> 14 #include <cublas_v2.h> 15 #include <cuda.h> 16 17 typedef struct { 18 CeedScalar *h_array; 19 CeedScalar *h_array_borrowed; 20 CeedScalar *h_array_owned; 21 CeedScalar *d_array; 22 CeedScalar *d_array_borrowed; 23 CeedScalar *d_array_owned; 24 } CeedVector_Cuda; 25 26 typedef struct { 27 CUmodule module; 28 CUfunction StridedTranspose; 29 CUfunction StridedNoTranspose; 30 CUfunction OffsetTranspose; 31 CUfunction OffsetNoTranspose; 32 CeedInt num_nodes; 33 CeedInt *h_ind; 34 CeedInt *h_ind_allocated; 35 CeedInt *d_ind; 36 CeedInt *d_ind_allocated; 37 CeedInt *d_t_offsets; 38 CeedInt *d_t_indices; 39 CeedInt *d_l_vec_indices; 40 } CeedElemRestriction_Cuda; 41 42 typedef struct { 43 CUmodule module; 44 CUfunction Interp; 45 CUfunction Grad; 46 CUfunction Weight; 47 CeedScalar *d_interp_1d; 48 CeedScalar *d_grad_1d; 49 CeedScalar *d_q_weight_1d; 50 } CeedBasis_Cuda; 51 52 typedef struct { 53 CUmodule module; 54 CUfunction Interp; 55 CUfunction Grad; 56 CUfunction Weight; 57 CeedScalar *d_interp; 58 CeedScalar *d_grad; 59 CeedScalar *d_q_weight; 60 } CeedBasisNonTensor_Cuda; 61 62 typedef struct { 63 CUmodule module; 64 char *qfunction_name; 65 char *qfunction_source; 66 CUfunction QFunction; 67 Fields_Cuda fields; 68 void *d_c; 69 } CeedQFunction_Cuda; 70 71 typedef struct { 72 void *h_data; 73 void *h_data_borrowed; 74 void *h_data_owned; 75 void *d_data; 76 void *d_data_borrowed; 77 void *d_data_owned; 78 } CeedQFunctionContext_Cuda; 79 80 typedef struct { 81 CUmodule module; 82 CUfunction linearDiagonal; 83 CUfunction linearPointBlock; 84 CeedBasis basisin, basisout; 85 CeedElemRestriction diagrstr, pbdiagrstr; 86 CeedVector elemdiag, pbelemdiag; 87 CeedInt numemodein, numemodeout, nnodes; 88 CeedEvalMode *h_emodein, *h_emodeout; 89 CeedEvalMode *d_emodein, *d_emodeout; 90 CeedScalar *d_identity, *d_interpin, *d_interpout, *d_gradin, *d_gradout; 91 } CeedOperatorDiag_Cuda; 92 93 typedef struct { 94 CUmodule module; 95 CUfunction linearAssemble; 96 CeedInt nelem, block_size_x, block_size_y, elemsPerBlock; 97 CeedScalar *d_B_in, *d_B_out; 98 } CeedOperatorAssemble_Cuda; 99 100 typedef struct { 101 CeedVector *evecs; // E-vectors, inputs followed by outputs 102 CeedVector *qvecsin; // Input Q-vectors needed to apply operator 103 CeedVector *qvecsout; // Output Q-vectors needed to apply operator 104 CeedInt numein; 105 CeedInt numeout; 106 CeedInt qfnumactivein, qfnumactiveout; 107 CeedVector *qfactivein; 108 CeedOperatorDiag_Cuda *diag; 109 CeedOperatorAssemble_Cuda *asmb; 110 } CeedOperator_Cuda; 111 112 CEED_INTERN int CeedGetCublasHandle_Cuda(Ceed ceed, cublasHandle_t *handle); 113 114 CEED_INTERN int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec); 115 116 CEED_INTERN int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *indices, CeedElemRestriction r); 117 118 CEED_INTERN int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 119 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis); 120 121 CEED_INTERN int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 122 const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 123 124 CEED_INTERN int CeedQFunctionCreate_Cuda(CeedQFunction qf); 125 126 CEED_INTERN int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx); 127 128 CEED_INTERN int CeedOperatorCreate_Cuda(CeedOperator op); 129 130 #endif 131