xref: /libCEED/backends/cuda-ref/ceed-cuda-ref.h (revision 0305e208bc4684eaa3113a3ca8b7ffdd1d4cb7a9)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
80d0321e0SJeremy L Thompson #ifndef _ceed_cuda_h
90d0321e0SJeremy L Thompson #define _ceed_cuda_h
100d0321e0SJeremy L Thompson 
1149aac155SJeremy L Thompson #include <ceed.h>
120d0321e0SJeremy L Thompson #include <ceed/backend.h>
1349aac155SJeremy L Thompson #include <ceed/jit-source/cuda/cuda-types.h>
1449aac155SJeremy L Thompson #include <cublas_v2.h>
150d0321e0SJeremy L Thompson #include <cuda.h>
162b730f8bSJeremy L Thompson 
170d0321e0SJeremy L Thompson typedef struct {
180d0321e0SJeremy L Thompson   CeedScalar *h_array;
190d0321e0SJeremy L Thompson   CeedScalar *h_array_borrowed;
200d0321e0SJeremy L Thompson   CeedScalar *h_array_owned;
210d0321e0SJeremy L Thompson   CeedScalar *d_array;
220d0321e0SJeremy L Thompson   CeedScalar *d_array_borrowed;
230d0321e0SJeremy L Thompson   CeedScalar *d_array_owned;
240d0321e0SJeremy L Thompson } CeedVector_Cuda;
250d0321e0SJeremy L Thompson 
260d0321e0SJeremy L Thompson typedef struct {
270d0321e0SJeremy L Thompson   CUmodule   module;
28437930d1SJeremy L Thompson   CUfunction StridedTranspose;
29437930d1SJeremy L Thompson   CUfunction StridedNoTranspose;
30437930d1SJeremy L Thompson   CUfunction OffsetTranspose;
31437930d1SJeremy L Thompson   CUfunction OffsetNoTranspose;
32437930d1SJeremy L Thompson   CeedInt    num_nodes;
330d0321e0SJeremy L Thompson   CeedInt   *h_ind;
340d0321e0SJeremy L Thompson   CeedInt   *h_ind_allocated;
350d0321e0SJeremy L Thompson   CeedInt   *d_ind;
360d0321e0SJeremy L Thompson   CeedInt   *d_ind_allocated;
37437930d1SJeremy L Thompson   CeedInt   *d_t_offsets;
38437930d1SJeremy L Thompson   CeedInt   *d_t_indices;
39437930d1SJeremy L Thompson   CeedInt   *d_l_vec_indices;
400d0321e0SJeremy L Thompson } CeedElemRestriction_Cuda;
410d0321e0SJeremy L Thompson 
42437930d1SJeremy L Thompson typedef struct {
43437930d1SJeremy L Thompson   CUmodule    module;
44437930d1SJeremy L Thompson   CUfunction  Interp;
45437930d1SJeremy L Thompson   CUfunction  Grad;
46437930d1SJeremy L Thompson   CUfunction  Weight;
47437930d1SJeremy L Thompson   CeedScalar *d_interp_1d;
48437930d1SJeremy L Thompson   CeedScalar *d_grad_1d;
49437930d1SJeremy L Thompson   CeedScalar *d_q_weight_1d;
50437930d1SJeremy L Thompson } CeedBasis_Cuda;
51437930d1SJeremy L Thompson 
52437930d1SJeremy L Thompson typedef struct {
53437930d1SJeremy L Thompson   CUmodule    module;
54437930d1SJeremy L Thompson   CUfunction  Interp;
55437930d1SJeremy L Thompson   CUfunction  Grad;
56437930d1SJeremy L Thompson   CUfunction  Weight;
57437930d1SJeremy L Thompson   CeedScalar *d_interp;
58437930d1SJeremy L Thompson   CeedScalar *d_grad;
59437930d1SJeremy L Thompson   CeedScalar *d_q_weight;
60437930d1SJeremy L Thompson } CeedBasisNonTensor_Cuda;
61437930d1SJeremy L Thompson 
620d0321e0SJeremy L Thompson typedef struct {
630d0321e0SJeremy L Thompson   CUmodule    module;
64437930d1SJeremy L Thompson   char       *qfunction_name;
65437930d1SJeremy L Thompson   char       *qfunction_source;
66437930d1SJeremy L Thompson   CUfunction  QFunction;
670d0321e0SJeremy L Thompson   Fields_Cuda fields;
680d0321e0SJeremy L Thompson   void       *d_c;
690d0321e0SJeremy L Thompson } CeedQFunction_Cuda;
700d0321e0SJeremy L Thompson 
710d0321e0SJeremy L Thompson typedef struct {
720d0321e0SJeremy L Thompson   void *h_data;
730d0321e0SJeremy L Thompson   void *h_data_borrowed;
740d0321e0SJeremy L Thompson   void *h_data_owned;
750d0321e0SJeremy L Thompson   void *d_data;
760d0321e0SJeremy L Thompson   void *d_data_borrowed;
770d0321e0SJeremy L Thompson   void *d_data_owned;
780d0321e0SJeremy L Thompson } CeedQFunctionContext_Cuda;
790d0321e0SJeremy L Thompson 
800d0321e0SJeremy L Thompson typedef struct {
810d0321e0SJeremy L Thompson   CUmodule            module;
820d0321e0SJeremy L Thompson   CUfunction          linearDiagonal;
830d0321e0SJeremy L Thompson   CUfunction          linearPointBlock;
840d0321e0SJeremy L Thompson   CeedBasis           basisin, basisout;
850d0321e0SJeremy L Thompson   CeedElemRestriction diagrstr, pbdiagrstr;
860d0321e0SJeremy L Thompson   CeedVector          elemdiag, pbelemdiag;
870d0321e0SJeremy L Thompson   CeedInt             numemodein, numemodeout, nnodes;
880d0321e0SJeremy L Thompson   CeedEvalMode       *h_emodein, *h_emodeout;
890d0321e0SJeremy L Thompson   CeedEvalMode       *d_emodein, *d_emodeout;
900d0321e0SJeremy L Thompson   CeedScalar         *d_identity, *d_interpin, *d_interpout, *d_gradin, *d_gradout;
910d0321e0SJeremy L Thompson } CeedOperatorDiag_Cuda;
920d0321e0SJeremy L Thompson 
930d0321e0SJeremy L Thompson typedef struct {
94cc132f9aSnbeams   CUmodule    module;
95cc132f9aSnbeams   CUfunction  linearAssemble;
9659ad764aSnbeams   CeedInt     nelem, block_size_x, block_size_y, elemsPerBlock;
97cc132f9aSnbeams   CeedScalar *d_B_in, *d_B_out;
98cc132f9aSnbeams } CeedOperatorAssemble_Cuda;
99cc132f9aSnbeams 
100cc132f9aSnbeams typedef struct {
1010d0321e0SJeremy L Thompson   CeedVector                *evecs;     // E-vectors, inputs followed by outputs
1020d0321e0SJeremy L Thompson   CeedVector                *qvecsin;   // Input Q-vectors needed to apply operator
1030d0321e0SJeremy L Thompson   CeedVector                *qvecsout;  // Output Q-vectors needed to apply operator
1040d0321e0SJeremy L Thompson   CeedInt                    numein;
1050d0321e0SJeremy L Thompson   CeedInt                    numeout;
1060d0321e0SJeremy L Thompson   CeedInt                    qfnumactivein, qfnumactiveout;
1070d0321e0SJeremy L Thompson   CeedVector                *qfactivein;
1080d0321e0SJeremy L Thompson   CeedOperatorDiag_Cuda     *diag;
109cc132f9aSnbeams   CeedOperatorAssemble_Cuda *asmb;
1100d0321e0SJeremy L Thompson } CeedOperator_Cuda;
1110d0321e0SJeremy L Thompson 
112eb7e6cafSJeremy L Thompson CEED_INTERN int CeedGetCublasHandle_Cuda(Ceed ceed, cublasHandle_t *handle);
1130d0321e0SJeremy L Thompson 
1141f9221feSJeremy L Thompson CEED_INTERN int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec);
1150d0321e0SJeremy L Thompson 
116*0305e208SSebastian Grimberg CEED_INTERN int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *indices, const bool *orients,
117*0305e208SSebastian Grimberg                                                const CeedInt *curl_orients, CeedElemRestriction r);
1180d0321e0SJeremy L Thompson 
1192b730f8bSJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
1206574a04fSJeremy L Thompson                                              const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis);
1210d0321e0SJeremy L Thompson 
12251475c7cSJeremy L Thompson CEED_INTERN int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
12351475c7cSJeremy L Thompson                                        const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);
1240d0321e0SJeremy L Thompson 
1250d0321e0SJeremy L Thompson CEED_INTERN int CeedQFunctionCreate_Cuda(CeedQFunction qf);
1260d0321e0SJeremy L Thompson 
1270d0321e0SJeremy L Thompson CEED_INTERN int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx);
1280d0321e0SJeremy L Thompson 
1290d0321e0SJeremy L Thompson CEED_INTERN int CeedOperatorCreate_Cuda(CeedOperator op);
1300d0321e0SJeremy L Thompson 
1310d0321e0SJeremy L Thompson #endif
132