xref: /libCEED/rust/libceed-sys/c-src/backends/cuda-ref/ceed-cuda-ref-qfunction.c (revision 46dc07349c44057b9efae59cd6a2d41f419237bd)
10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
40d0321e0SJeremy L Thompson //
50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and
80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed.
90d0321e0SJeremy L Thompson //
100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early
150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
160d0321e0SJeremy L Thompson 
170d0321e0SJeremy L Thompson #include <ceed/ceed.h>
180d0321e0SJeremy L Thompson #include <ceed/backend.h>
190d0321e0SJeremy L Thompson #include <cuda.h>
200d0321e0SJeremy L Thompson #include <stdio.h>
210d0321e0SJeremy L Thompson #include <string.h>
220d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h"
230d0321e0SJeremy L Thompson #include "ceed-cuda-ref-qfunction-load.h"
240d0321e0SJeremy L Thompson #include "../cuda/ceed-cuda-compile.h"
250d0321e0SJeremy L Thompson 
260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
270d0321e0SJeremy L Thompson // Apply QFunction
280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
290d0321e0SJeremy L Thompson static int CeedQFunctionApply_Cuda(CeedQFunction qf, CeedInt Q,
300d0321e0SJeremy L Thompson                                    CeedVector *U, CeedVector *V) {
310d0321e0SJeremy L Thompson   int ierr;
320d0321e0SJeremy L Thompson   Ceed ceed;
330d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr);
340d0321e0SJeremy L Thompson 
350d0321e0SJeremy L Thompson   // Build and compile kernel, if not done
360d0321e0SJeremy L Thompson   ierr = CeedCudaBuildQFunction(qf); CeedChkBackend(ierr);
370d0321e0SJeremy L Thompson 
380d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
390d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr);
400d0321e0SJeremy L Thompson   Ceed_Cuda *ceed_Cuda;
410d0321e0SJeremy L Thompson   ierr = CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr);
42437930d1SJeremy L Thompson   CeedInt num_input_fields, num_output_fields;
43437930d1SJeremy L Thompson   ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
440d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
450d0321e0SJeremy L Thompson 
460d0321e0SJeremy L Thompson   // Read vectors
47437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_input_fields; i++) {
480d0321e0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_DEVICE, &data->fields.inputs[i]);
490d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
500d0321e0SJeremy L Thompson   }
51437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_output_fields; i++) {
520d0321e0SJeremy L Thompson     ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]);
530d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
540d0321e0SJeremy L Thompson   }
550d0321e0SJeremy L Thompson 
560d0321e0SJeremy L Thompson   // Get context data
570d0321e0SJeremy L Thompson   CeedQFunctionContext ctx;
580d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr);
590d0321e0SJeremy L Thompson   if (ctx) {
600d0321e0SJeremy L Thompson     ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &data->d_c);
610d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
620d0321e0SJeremy L Thompson   }
630d0321e0SJeremy L Thompson 
640d0321e0SJeremy L Thompson   // Run kernel
650d0321e0SJeremy L Thompson   void *args[] = {&data->d_c, (void *) &Q, &data->fields};
66437930d1SJeremy L Thompson   ierr = CeedRunKernelAutoblockCuda(ceed, data->QFunction, Q, args);
670d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
680d0321e0SJeremy L Thompson 
690d0321e0SJeremy L Thompson   // Restore vectors
70437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_input_fields; i++) {
710d0321e0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U[i], &data->fields.inputs[i]);
720d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
730d0321e0SJeremy L Thompson   }
74437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_output_fields; i++) {
750d0321e0SJeremy L Thompson     ierr = CeedVectorRestoreArray(V[i], &data->fields.outputs[i]);
760d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
770d0321e0SJeremy L Thompson   }
780d0321e0SJeremy L Thompson 
790d0321e0SJeremy L Thompson   // Restore context
800d0321e0SJeremy L Thompson   if (ctx) {
810d0321e0SJeremy L Thompson     ierr = CeedQFunctionContextRestoreData(ctx, &data->d_c);
820d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
830d0321e0SJeremy L Thompson   }
840d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
850d0321e0SJeremy L Thompson }
860d0321e0SJeremy L Thompson 
870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
880d0321e0SJeremy L Thompson // Destroy QFunction
890d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
900d0321e0SJeremy L Thompson static int CeedQFunctionDestroy_Cuda(CeedQFunction qf) {
910d0321e0SJeremy L Thompson   int ierr;
920d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
930d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr);
940d0321e0SJeremy L Thompson   Ceed ceed;
950d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr);
960d0321e0SJeremy L Thompson   if (data->module)
970d0321e0SJeremy L Thompson     CeedChk_Cu(ceed, cuModuleUnload(data->module));
980d0321e0SJeremy L Thompson   ierr = CeedFree(&data); CeedChkBackend(ierr);
99437930d1SJeremy L Thompson 
1000d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1010d0321e0SJeremy L Thompson }
1020d0321e0SJeremy L Thompson 
1030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1040d0321e0SJeremy L Thompson // Set User QFunction
1050d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1060d0321e0SJeremy L Thompson static int CeedQFunctionSetCUDAUserFunction_Cuda(CeedQFunction qf,
1070d0321e0SJeremy L Thompson     CUfunction f) {
1080d0321e0SJeremy L Thompson   int ierr;
1090d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
1100d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr);
111437930d1SJeremy L Thompson   data->QFunction = f;
112437930d1SJeremy L Thompson 
1130d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1140d0321e0SJeremy L Thompson }
1150d0321e0SJeremy L Thompson 
1160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1170d0321e0SJeremy L Thompson // Create QFunction
1180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1190d0321e0SJeremy L Thompson int CeedQFunctionCreate_Cuda(CeedQFunction qf) {
1200d0321e0SJeremy L Thompson   int ierr;
1210d0321e0SJeremy L Thompson   Ceed ceed;
1220d0321e0SJeremy L Thompson   CeedQFunctionGetCeed(qf, &ceed);
1230d0321e0SJeremy L Thompson   CeedQFunction_Cuda *data;
1240d0321e0SJeremy L Thompson   ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
1250d0321e0SJeremy L Thompson   ierr = CeedQFunctionSetData(qf, data); CeedChkBackend(ierr);
1260d0321e0SJeremy L Thompson 
1270d0321e0SJeremy L Thompson   // Read QFunction source
128437930d1SJeremy L Thompson   ierr = CeedQFunctionGetKernelName(qf, &data->qfunction_name);
1290d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
130*46dc0734SJeremy L Thompson   CeedDebug256(ceed, 2, "----- Loading QFunction User Source -----\n");
131437930d1SJeremy L Thompson   ierr = CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source);
1320d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
133*46dc0734SJeremy L Thompson   CeedDebug256(ceed, 2, "----- Loading QFunction User Source Complete! -----\n");
1340d0321e0SJeremy L Thompson 
1350d0321e0SJeremy L Thompson   // Register backend functions
1360d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply",
1370d0321e0SJeremy L Thompson                                 CeedQFunctionApply_Cuda); CeedChkBackend(ierr);
1380d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy",
1390d0321e0SJeremy L Thompson                                 CeedQFunctionDestroy_Cuda); CeedChkBackend(ierr);
1400d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "SetCUDAUserFunction",
1410d0321e0SJeremy L Thompson                                 CeedQFunctionSetCUDAUserFunction_Cuda);
1420d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
1430d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1440d0321e0SJeremy L Thompson }
1450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
146