1*0d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*0d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*0d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 4*0d0321e0SJeremy L Thompson // 5*0d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 6*0d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 7*0d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 8*0d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 9*0d0321e0SJeremy L Thompson // 10*0d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*0d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 12*0d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 13*0d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 14*0d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 15*0d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 16*0d0321e0SJeremy L Thompson 17*0d0321e0SJeremy L Thompson #include <ceed/ceed.h> 18*0d0321e0SJeremy L Thompson #include <ceed/backend.h> 19*0d0321e0SJeremy L Thompson #include <cuda.h> 20*0d0321e0SJeremy L Thompson #include <stdio.h> 21*0d0321e0SJeremy L Thompson #include <string.h> 22*0d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h" 23*0d0321e0SJeremy L Thompson #include "ceed-cuda-ref-qfunction-load.h" 24*0d0321e0SJeremy L Thompson #include "../cuda/ceed-cuda-compile.h" 25*0d0321e0SJeremy L Thompson 26*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 27*0d0321e0SJeremy L Thompson // Apply QFunction 28*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 29*0d0321e0SJeremy L Thompson static int CeedQFunctionApply_Cuda(CeedQFunction qf, CeedInt Q, 30*0d0321e0SJeremy L Thompson CeedVector *U, CeedVector *V) { 31*0d0321e0SJeremy L Thompson int ierr; 32*0d0321e0SJeremy L Thompson Ceed ceed; 33*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr); 34*0d0321e0SJeremy L Thompson 35*0d0321e0SJeremy L Thompson // Build and compile kernel, if not done 36*0d0321e0SJeremy L Thompson ierr = CeedCudaBuildQFunction(qf); CeedChkBackend(ierr); 37*0d0321e0SJeremy L Thompson 38*0d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 39*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 40*0d0321e0SJeremy L Thompson Ceed_Cuda *ceed_Cuda; 41*0d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr); 42*0d0321e0SJeremy L Thompson CeedInt numinputfields, numoutputfields; 43*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 44*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 45*0d0321e0SJeremy L Thompson 46*0d0321e0SJeremy L Thompson // Read vectors 47*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 48*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_DEVICE, &data->fields.inputs[i]); 49*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 50*0d0321e0SJeremy L Thompson } 51*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 52*0d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]); 53*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 54*0d0321e0SJeremy L Thompson } 55*0d0321e0SJeremy L Thompson 56*0d0321e0SJeremy L Thompson // Get context data 57*0d0321e0SJeremy L Thompson CeedQFunctionContext ctx; 58*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr); 59*0d0321e0SJeremy L Thompson if (ctx) { 60*0d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &data->d_c); 61*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 62*0d0321e0SJeremy L Thompson } 63*0d0321e0SJeremy L Thompson 64*0d0321e0SJeremy L Thompson // Run kernel 65*0d0321e0SJeremy L Thompson void *args[] = {&data->d_c, (void *) &Q, &data->fields}; 66*0d0321e0SJeremy L Thompson ierr = CeedRunKernelAutoblockCuda(ceed, data->qFunction, Q, args); 67*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 68*0d0321e0SJeremy L Thompson 69*0d0321e0SJeremy L Thompson // Restore vectors 70*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numinputfields; i++) { 71*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U[i], &data->fields.inputs[i]); 72*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 73*0d0321e0SJeremy L Thompson } 74*0d0321e0SJeremy L Thompson for (CeedInt i = 0; i < numoutputfields; i++) { 75*0d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(V[i], &data->fields.outputs[i]); 76*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 77*0d0321e0SJeremy L Thompson } 78*0d0321e0SJeremy L Thompson 79*0d0321e0SJeremy L Thompson // Restore context 80*0d0321e0SJeremy L Thompson if (ctx) { 81*0d0321e0SJeremy L Thompson ierr = CeedQFunctionContextRestoreData(ctx, &data->d_c); 82*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 83*0d0321e0SJeremy L Thompson } 84*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 85*0d0321e0SJeremy L Thompson } 86*0d0321e0SJeremy L Thompson 87*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 88*0d0321e0SJeremy L Thompson // Destroy QFunction 89*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 90*0d0321e0SJeremy L Thompson static int CeedQFunctionDestroy_Cuda(CeedQFunction qf) { 91*0d0321e0SJeremy L Thompson int ierr; 92*0d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 93*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 94*0d0321e0SJeremy L Thompson Ceed ceed; 95*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr); 96*0d0321e0SJeremy L Thompson if (data->module) 97*0d0321e0SJeremy L Thompson CeedChk_Cu(ceed, cuModuleUnload(data->module)); 98*0d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 99*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 100*0d0321e0SJeremy L Thompson } 101*0d0321e0SJeremy L Thompson 102*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 103*0d0321e0SJeremy L Thompson // Set User QFunction 104*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 105*0d0321e0SJeremy L Thompson static int CeedQFunctionSetCUDAUserFunction_Cuda(CeedQFunction qf, 106*0d0321e0SJeremy L Thompson CUfunction f) { 107*0d0321e0SJeremy L Thompson int ierr; 108*0d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 109*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 110*0d0321e0SJeremy L Thompson data->qFunction = f; 111*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 112*0d0321e0SJeremy L Thompson } 113*0d0321e0SJeremy L Thompson 114*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 115*0d0321e0SJeremy L Thompson // Create QFunction 116*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 117*0d0321e0SJeremy L Thompson int CeedQFunctionCreate_Cuda(CeedQFunction qf) { 118*0d0321e0SJeremy L Thompson int ierr; 119*0d0321e0SJeremy L Thompson Ceed ceed; 120*0d0321e0SJeremy L Thompson CeedQFunctionGetCeed(qf, &ceed); 121*0d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 122*0d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 123*0d0321e0SJeremy L Thompson ierr = CeedQFunctionSetData(qf, data); CeedChkBackend(ierr); 124*0d0321e0SJeremy L Thompson 125*0d0321e0SJeremy L Thompson // Read QFunction source 126*0d0321e0SJeremy L Thompson ierr = CeedQFunctionGetKernelName(qf, &data->qFunctionName); 127*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 128*0d0321e0SJeremy L Thompson ierr = CeedQFunctionLoadSourceToBuffer(qf, &data->qFunctionSource); 129*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 130*0d0321e0SJeremy L Thompson 131*0d0321e0SJeremy L Thompson // Register backend functions 132*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", 133*0d0321e0SJeremy L Thompson CeedQFunctionApply_Cuda); CeedChkBackend(ierr); 134*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", 135*0d0321e0SJeremy L Thompson CeedQFunctionDestroy_Cuda); CeedChkBackend(ierr); 136*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "SetCUDAUserFunction", 137*0d0321e0SJeremy L Thompson CeedQFunctionSetCUDAUserFunction_Cuda); 138*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 139*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 140*0d0321e0SJeremy L Thompson } 141*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 142