1*0d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*0d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*0d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 4*0d0321e0SJeremy L Thompson // 5*0d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 6*0d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 7*0d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 8*0d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 9*0d0321e0SJeremy L Thompson // 10*0d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*0d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 12*0d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 13*0d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 14*0d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 15*0d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 16*0d0321e0SJeremy L Thompson 17*0d0321e0SJeremy L Thompson #ifndef _ceed_cuda_h 18*0d0321e0SJeremy L Thompson #define _ceed_cuda_h 19*0d0321e0SJeremy L Thompson 20*0d0321e0SJeremy L Thompson #include <ceed/ceed.h> 21*0d0321e0SJeremy L Thompson #include <ceed/backend.h> 22*0d0321e0SJeremy L Thompson #include <cuda.h> 23*0d0321e0SJeremy L Thompson #include "../cuda/ceed-cuda-common.h" 24*0d0321e0SJeremy L Thompson 25*0d0321e0SJeremy L Thompson typedef struct { 26*0d0321e0SJeremy L Thompson CeedScalar *h_array; 27*0d0321e0SJeremy L Thompson CeedScalar *h_array_borrowed; 28*0d0321e0SJeremy L Thompson CeedScalar *h_array_owned; 29*0d0321e0SJeremy L Thompson CeedScalar *d_array; 30*0d0321e0SJeremy L Thompson CeedScalar *d_array_borrowed; 31*0d0321e0SJeremy L Thompson CeedScalar *d_array_owned; 32*0d0321e0SJeremy L Thompson } CeedVector_Cuda; 33*0d0321e0SJeremy L Thompson 34*0d0321e0SJeremy L Thompson typedef struct { 35*0d0321e0SJeremy L Thompson CUmodule module; 36*0d0321e0SJeremy L Thompson CUfunction noTrStrided; 37*0d0321e0SJeremy L Thompson CUfunction noTrOffset; 38*0d0321e0SJeremy L Thompson CUfunction trStrided; 39*0d0321e0SJeremy L Thompson CUfunction trOffset; 40*0d0321e0SJeremy L Thompson CeedInt nnodes; 41*0d0321e0SJeremy L Thompson CeedInt *h_ind; 42*0d0321e0SJeremy L Thompson CeedInt *h_ind_allocated; 43*0d0321e0SJeremy L Thompson CeedInt *d_ind; 44*0d0321e0SJeremy L Thompson CeedInt *d_ind_allocated; 45*0d0321e0SJeremy L Thompson CeedInt *d_toffsets; 46*0d0321e0SJeremy L Thompson CeedInt *d_tindices; 47*0d0321e0SJeremy L Thompson CeedInt *d_lvec_indices; 48*0d0321e0SJeremy L Thompson } CeedElemRestriction_Cuda; 49*0d0321e0SJeremy L Thompson 50*0d0321e0SJeremy L Thompson // We use a struct to avoid having to memCpy the array of pointers 51*0d0321e0SJeremy L Thompson // __global__ copies by value the struct. 52*0d0321e0SJeremy L Thompson typedef struct { 53*0d0321e0SJeremy L Thompson const CeedScalar *inputs[CEED_FIELD_MAX]; 54*0d0321e0SJeremy L Thompson CeedScalar *outputs[CEED_FIELD_MAX]; 55*0d0321e0SJeremy L Thompson } Fields_Cuda; 56*0d0321e0SJeremy L Thompson 57*0d0321e0SJeremy L Thompson typedef struct { 58*0d0321e0SJeremy L Thompson CUmodule module; 59*0d0321e0SJeremy L Thompson char *qFunctionName; 60*0d0321e0SJeremy L Thompson char *qFunctionSource; 61*0d0321e0SJeremy L Thompson CUfunction qFunction; 62*0d0321e0SJeremy L Thompson Fields_Cuda fields; 63*0d0321e0SJeremy L Thompson void *d_c; 64*0d0321e0SJeremy L Thompson } CeedQFunction_Cuda; 65*0d0321e0SJeremy L Thompson 66*0d0321e0SJeremy L Thompson typedef struct { 67*0d0321e0SJeremy L Thompson void *h_data; 68*0d0321e0SJeremy L Thompson void *h_data_borrowed; 69*0d0321e0SJeremy L Thompson void *h_data_owned; 70*0d0321e0SJeremy L Thompson void *d_data; 71*0d0321e0SJeremy L Thompson void *d_data_borrowed; 72*0d0321e0SJeremy L Thompson void *d_data_owned; 73*0d0321e0SJeremy L Thompson } CeedQFunctionContext_Cuda; 74*0d0321e0SJeremy L Thompson 75*0d0321e0SJeremy L Thompson typedef struct { 76*0d0321e0SJeremy L Thompson CUmodule module; 77*0d0321e0SJeremy L Thompson CUfunction interp; 78*0d0321e0SJeremy L Thompson CUfunction grad; 79*0d0321e0SJeremy L Thompson CUfunction weight; 80*0d0321e0SJeremy L Thompson CeedScalar *d_interp1d; 81*0d0321e0SJeremy L Thompson CeedScalar *d_grad1d; 82*0d0321e0SJeremy L Thompson CeedScalar *d_qweight1d; 83*0d0321e0SJeremy L Thompson } CeedBasis_Cuda; 84*0d0321e0SJeremy L Thompson 85*0d0321e0SJeremy L Thompson typedef struct { 86*0d0321e0SJeremy L Thompson CUmodule module; 87*0d0321e0SJeremy L Thompson CUfunction interp; 88*0d0321e0SJeremy L Thompson CUfunction grad; 89*0d0321e0SJeremy L Thompson CUfunction weight; 90*0d0321e0SJeremy L Thompson CeedScalar *d_interp; 91*0d0321e0SJeremy L Thompson CeedScalar *d_grad; 92*0d0321e0SJeremy L Thompson CeedScalar *d_qweight; 93*0d0321e0SJeremy L Thompson } CeedBasisNonTensor_Cuda; 94*0d0321e0SJeremy L Thompson 95*0d0321e0SJeremy L Thompson typedef struct { 96*0d0321e0SJeremy L Thompson CUmodule module; 97*0d0321e0SJeremy L Thompson CUfunction linearDiagonal; 98*0d0321e0SJeremy L Thompson CUfunction linearPointBlock; 99*0d0321e0SJeremy L Thompson CeedBasis basisin, basisout; 100*0d0321e0SJeremy L Thompson CeedElemRestriction diagrstr, pbdiagrstr; 101*0d0321e0SJeremy L Thompson CeedVector elemdiag, pbelemdiag; 102*0d0321e0SJeremy L Thompson CeedInt numemodein, numemodeout, nnodes; 103*0d0321e0SJeremy L Thompson CeedEvalMode *h_emodein, *h_emodeout; 104*0d0321e0SJeremy L Thompson CeedEvalMode *d_emodein, *d_emodeout; 105*0d0321e0SJeremy L Thompson CeedScalar *d_identity, *d_interpin, *d_interpout, *d_gradin, *d_gradout; 106*0d0321e0SJeremy L Thompson } CeedOperatorDiag_Cuda; 107*0d0321e0SJeremy L Thompson 108*0d0321e0SJeremy L Thompson typedef struct { 109*0d0321e0SJeremy L Thompson CeedVector *evecs; // E-vectors, inputs followed by outputs 110*0d0321e0SJeremy L Thompson CeedVector *qvecsin; // Input Q-vectors needed to apply operator 111*0d0321e0SJeremy L Thompson CeedVector *qvecsout; // Output Q-vectors needed to apply operator 112*0d0321e0SJeremy L Thompson CeedInt numein; 113*0d0321e0SJeremy L Thompson CeedInt numeout; 114*0d0321e0SJeremy L Thompson CeedInt qfnumactivein, qfnumactiveout; 115*0d0321e0SJeremy L Thompson CeedVector *qfactivein; 116*0d0321e0SJeremy L Thompson CeedOperatorDiag_Cuda *diag; 117*0d0321e0SJeremy L Thompson } CeedOperator_Cuda; 118*0d0321e0SJeremy L Thompson 119*0d0321e0SJeremy L Thompson CEED_INTERN int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle); 120*0d0321e0SJeremy L Thompson 121*0d0321e0SJeremy L Thompson CEED_INTERN int CeedVectorCreate_Cuda(CeedInt n, CeedVector vec); 122*0d0321e0SJeremy L Thompson 123*0d0321e0SJeremy L Thompson CEED_INTERN int CeedElemRestrictionCreate_Cuda(CeedMemType mtype, 124*0d0321e0SJeremy L Thompson CeedCopyMode cmode, const CeedInt *indices, CeedElemRestriction r); 125*0d0321e0SJeremy L Thompson 126*0d0321e0SJeremy L Thompson CEED_INTERN int CeedElemRestrictionCreateBlocked_Cuda(const CeedMemType mtype, 127*0d0321e0SJeremy L Thompson const CeedCopyMode cmode, const CeedInt *indices, 128*0d0321e0SJeremy L Thompson const CeedElemRestriction res); 129*0d0321e0SJeremy L Thompson 130*0d0321e0SJeremy L Thompson CEED_INTERN int CeedBasisApplyElems_Cuda(CeedBasis basis, const CeedInt nelem, 131*0d0321e0SJeremy L Thompson CeedTransposeMode tmode, CeedEvalMode emode, const CeedVector u, CeedVector v); 132*0d0321e0SJeremy L Thompson 133*0d0321e0SJeremy L Thompson CEED_INTERN int CeedQFunctionApplyElems_Cuda(CeedQFunction qf, const CeedInt Q, 134*0d0321e0SJeremy L Thompson const CeedVector *const u, const CeedVector *v); 135*0d0321e0SJeremy L Thompson 136*0d0321e0SJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P1d, 137*0d0321e0SJeremy L Thompson CeedInt Q1d, 138*0d0321e0SJeremy L Thompson const CeedScalar *interp1d, 139*0d0321e0SJeremy L Thompson const CeedScalar *grad1d, 140*0d0321e0SJeremy L Thompson const CeedScalar *qref1d, 141*0d0321e0SJeremy L Thompson const CeedScalar *qweight1d, 142*0d0321e0SJeremy L Thompson CeedBasis basis); 143*0d0321e0SJeremy L Thompson 144*0d0321e0SJeremy L Thompson CEED_INTERN int CeedBasisCreateH1_Cuda(CeedElemTopology, CeedInt, CeedInt, 145*0d0321e0SJeremy L Thompson CeedInt, const CeedScalar *, 146*0d0321e0SJeremy L Thompson const CeedScalar *, const CeedScalar *, 147*0d0321e0SJeremy L Thompson const CeedScalar *, CeedBasis); 148*0d0321e0SJeremy L Thompson 149*0d0321e0SJeremy L Thompson CEED_INTERN int CeedQFunctionCreate_Cuda(CeedQFunction qf); 150*0d0321e0SJeremy L Thompson 151*0d0321e0SJeremy L Thompson CEED_INTERN int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx); 152*0d0321e0SJeremy L Thompson 153*0d0321e0SJeremy L Thompson CEED_INTERN int CeedOperatorCreate_Cuda(CeedOperator op); 154*0d0321e0SJeremy L Thompson 155*0d0321e0SJeremy L Thompson CEED_INTERN int CeedCompositeOperatorCreate_Cuda(CeedOperator op); 156*0d0321e0SJeremy L Thompson #endif 157