xref: /libCEED/backends/cuda-ref/ceed-cuda-ref-qfunction.c (revision 0d0321e0e600f17fbb9528732fcb5c1d5c63fc0f)
1*0d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2*0d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3*0d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
4*0d0321e0SJeremy L Thompson //
5*0d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
6*0d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
7*0d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and
8*0d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed.
9*0d0321e0SJeremy L Thompson //
10*0d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*0d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
12*0d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
13*0d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
14*0d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early
15*0d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
16*0d0321e0SJeremy L Thompson 
17*0d0321e0SJeremy L Thompson #include <ceed/ceed.h>
18*0d0321e0SJeremy L Thompson #include <ceed/backend.h>
19*0d0321e0SJeremy L Thompson #include <cuda.h>
20*0d0321e0SJeremy L Thompson #include <stdio.h>
21*0d0321e0SJeremy L Thompson #include <string.h>
22*0d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h"
23*0d0321e0SJeremy L Thompson #include "ceed-cuda-ref-qfunction-load.h"
24*0d0321e0SJeremy L Thompson #include "../cuda/ceed-cuda-compile.h"
25*0d0321e0SJeremy L Thompson 
26*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
27*0d0321e0SJeremy L Thompson // Apply QFunction
28*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
29*0d0321e0SJeremy L Thompson static int CeedQFunctionApply_Cuda(CeedQFunction qf, CeedInt Q,
30*0d0321e0SJeremy L Thompson                                    CeedVector *U, CeedVector *V) {
31*0d0321e0SJeremy L Thompson   int ierr;
32*0d0321e0SJeremy L Thompson   Ceed ceed;
33*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr);
34*0d0321e0SJeremy L Thompson 
35*0d0321e0SJeremy L Thompson   // Build and compile kernel, if not done
36*0d0321e0SJeremy L Thompson   ierr = CeedCudaBuildQFunction(qf); CeedChkBackend(ierr);
37*0d0321e0SJeremy L Thompson 
38*0d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
39*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr);
40*0d0321e0SJeremy L Thompson   Ceed_Cuda *ceed_Cuda;
41*0d0321e0SJeremy L Thompson   ierr = CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr);
42*0d0321e0SJeremy L Thompson   CeedInt numinputfields, numoutputfields;
43*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
44*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
45*0d0321e0SJeremy L Thompson 
46*0d0321e0SJeremy L Thompson   // Read vectors
47*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numinputfields; i++) {
48*0d0321e0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_DEVICE, &data->fields.inputs[i]);
49*0d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
50*0d0321e0SJeremy L Thompson   }
51*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numoutputfields; i++) {
52*0d0321e0SJeremy L Thompson     ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]);
53*0d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
54*0d0321e0SJeremy L Thompson   }
55*0d0321e0SJeremy L Thompson 
56*0d0321e0SJeremy L Thompson   // Get context data
57*0d0321e0SJeremy L Thompson   CeedQFunctionContext ctx;
58*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr);
59*0d0321e0SJeremy L Thompson   if (ctx) {
60*0d0321e0SJeremy L Thompson     ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &data->d_c);
61*0d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
62*0d0321e0SJeremy L Thompson   }
63*0d0321e0SJeremy L Thompson 
64*0d0321e0SJeremy L Thompson   // Run kernel
65*0d0321e0SJeremy L Thompson   void *args[] = {&data->d_c, (void *) &Q, &data->fields};
66*0d0321e0SJeremy L Thompson   ierr = CeedRunKernelAutoblockCuda(ceed, data->qFunction, Q, args);
67*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
68*0d0321e0SJeremy L Thompson 
69*0d0321e0SJeremy L Thompson   // Restore vectors
70*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numinputfields; i++) {
71*0d0321e0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U[i], &data->fields.inputs[i]);
72*0d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
73*0d0321e0SJeremy L Thompson   }
74*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numoutputfields; i++) {
75*0d0321e0SJeremy L Thompson     ierr = CeedVectorRestoreArray(V[i], &data->fields.outputs[i]);
76*0d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
77*0d0321e0SJeremy L Thompson   }
78*0d0321e0SJeremy L Thompson 
79*0d0321e0SJeremy L Thompson   // Restore context
80*0d0321e0SJeremy L Thompson   if (ctx) {
81*0d0321e0SJeremy L Thompson     ierr = CeedQFunctionContextRestoreData(ctx, &data->d_c);
82*0d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
83*0d0321e0SJeremy L Thompson   }
84*0d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
85*0d0321e0SJeremy L Thompson }
86*0d0321e0SJeremy L Thompson 
87*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
88*0d0321e0SJeremy L Thompson // Destroy QFunction
89*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
90*0d0321e0SJeremy L Thompson static int CeedQFunctionDestroy_Cuda(CeedQFunction qf) {
91*0d0321e0SJeremy L Thompson   int ierr;
92*0d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
93*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr);
94*0d0321e0SJeremy L Thompson   Ceed ceed;
95*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr);
96*0d0321e0SJeremy L Thompson   if  (data->module)
97*0d0321e0SJeremy L Thompson     CeedChk_Cu(ceed, cuModuleUnload(data->module));
98*0d0321e0SJeremy L Thompson   ierr = CeedFree(&data); CeedChkBackend(ierr);
99*0d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
100*0d0321e0SJeremy L Thompson }
101*0d0321e0SJeremy L Thompson 
102*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
103*0d0321e0SJeremy L Thompson // Set User QFunction
104*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
105*0d0321e0SJeremy L Thompson static int CeedQFunctionSetCUDAUserFunction_Cuda(CeedQFunction qf,
106*0d0321e0SJeremy L Thompson     CUfunction f) {
107*0d0321e0SJeremy L Thompson   int ierr;
108*0d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
109*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr);
110*0d0321e0SJeremy L Thompson   data->qFunction = f;
111*0d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
112*0d0321e0SJeremy L Thompson }
113*0d0321e0SJeremy L Thompson 
114*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
115*0d0321e0SJeremy L Thompson // Create QFunction
116*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
117*0d0321e0SJeremy L Thompson int CeedQFunctionCreate_Cuda(CeedQFunction qf) {
118*0d0321e0SJeremy L Thompson   int ierr;
119*0d0321e0SJeremy L Thompson   Ceed ceed;
120*0d0321e0SJeremy L Thompson   CeedQFunctionGetCeed(qf, &ceed);
121*0d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
122*0d0321e0SJeremy L Thompson   ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
123*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionSetData(qf, data); CeedChkBackend(ierr);
124*0d0321e0SJeremy L Thompson 
125*0d0321e0SJeremy L Thompson   // Read QFunction source
126*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetKernelName(qf, &data->qFunctionName);
127*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
128*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionLoadSourceToBuffer(qf, &data->qFunctionSource);
129*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
130*0d0321e0SJeremy L Thompson 
131*0d0321e0SJeremy L Thompson   // Register backend functions
132*0d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply",
133*0d0321e0SJeremy L Thompson                                 CeedQFunctionApply_Cuda); CeedChkBackend(ierr);
134*0d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy",
135*0d0321e0SJeremy L Thompson                                 CeedQFunctionDestroy_Cuda); CeedChkBackend(ierr);
136*0d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "SetCUDAUserFunction",
137*0d0321e0SJeremy L Thompson                                 CeedQFunctionSetCUDAUserFunction_Cuda);
138*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
139*0d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
140*0d0321e0SJeremy L Thompson }
141*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
142