10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 40d0321e0SJeremy L Thompson // 50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 90d0321e0SJeremy L Thompson // 100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson #include <ceed/ceed.h> 180d0321e0SJeremy L Thompson #include <ceed/backend.h> 190d0321e0SJeremy L Thompson #include <cuda.h> 200d0321e0SJeremy L Thompson #include <stdio.h> 210d0321e0SJeremy L Thompson #include <string.h> 220d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h" 230d0321e0SJeremy L Thompson #include "ceed-cuda-ref-qfunction-load.h" 240d0321e0SJeremy L Thompson #include "../cuda/ceed-cuda-compile.h" 250d0321e0SJeremy L Thompson 260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 270d0321e0SJeremy L Thompson // Apply QFunction 280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 290d0321e0SJeremy L Thompson static int CeedQFunctionApply_Cuda(CeedQFunction qf, CeedInt Q, 300d0321e0SJeremy L Thompson CeedVector *U, CeedVector *V) { 310d0321e0SJeremy L Thompson int ierr; 320d0321e0SJeremy L Thompson Ceed ceed; 330d0321e0SJeremy L Thompson ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr); 340d0321e0SJeremy L Thompson 350d0321e0SJeremy L Thompson // Build and compile kernel, if not done 360d0321e0SJeremy L Thompson ierr = CeedCudaBuildQFunction(qf); CeedChkBackend(ierr); 370d0321e0SJeremy L Thompson 380d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 390d0321e0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 400d0321e0SJeremy L Thompson Ceed_Cuda *ceed_Cuda; 410d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr); 42*437930d1SJeremy L Thompson CeedInt num_input_fields, num_output_fields; 43*437930d1SJeremy L Thompson ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 440d0321e0SJeremy L Thompson CeedChkBackend(ierr); 450d0321e0SJeremy L Thompson 460d0321e0SJeremy L Thompson // Read vectors 47*437930d1SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 480d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_DEVICE, &data->fields.inputs[i]); 490d0321e0SJeremy L Thompson CeedChkBackend(ierr); 500d0321e0SJeremy L Thompson } 51*437930d1SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 520d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]); 530d0321e0SJeremy L Thompson CeedChkBackend(ierr); 540d0321e0SJeremy L Thompson } 550d0321e0SJeremy L Thompson 560d0321e0SJeremy L Thompson // Get context data 570d0321e0SJeremy L Thompson CeedQFunctionContext ctx; 580d0321e0SJeremy L Thompson ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr); 590d0321e0SJeremy L Thompson if (ctx) { 600d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &data->d_c); 610d0321e0SJeremy L Thompson CeedChkBackend(ierr); 620d0321e0SJeremy L Thompson } 630d0321e0SJeremy L Thompson 640d0321e0SJeremy L Thompson // Run kernel 650d0321e0SJeremy L Thompson void *args[] = {&data->d_c, (void *) &Q, &data->fields}; 66*437930d1SJeremy L Thompson ierr = CeedRunKernelAutoblockCuda(ceed, data->QFunction, Q, args); 670d0321e0SJeremy L Thompson CeedChkBackend(ierr); 680d0321e0SJeremy L Thompson 690d0321e0SJeremy L Thompson // Restore vectors 70*437930d1SJeremy L Thompson for (CeedInt i = 0; i < num_input_fields; i++) { 710d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U[i], &data->fields.inputs[i]); 720d0321e0SJeremy L Thompson CeedChkBackend(ierr); 730d0321e0SJeremy L Thompson } 74*437930d1SJeremy L Thompson for (CeedInt i = 0; i < num_output_fields; i++) { 750d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(V[i], &data->fields.outputs[i]); 760d0321e0SJeremy L Thompson CeedChkBackend(ierr); 770d0321e0SJeremy L Thompson } 780d0321e0SJeremy L Thompson 790d0321e0SJeremy L Thompson // Restore context 800d0321e0SJeremy L Thompson if (ctx) { 810d0321e0SJeremy L Thompson ierr = CeedQFunctionContextRestoreData(ctx, &data->d_c); 820d0321e0SJeremy L Thompson CeedChkBackend(ierr); 830d0321e0SJeremy L Thompson } 840d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 850d0321e0SJeremy L Thompson } 860d0321e0SJeremy L Thompson 870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 880d0321e0SJeremy L Thompson // Destroy QFunction 890d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 900d0321e0SJeremy L Thompson static int CeedQFunctionDestroy_Cuda(CeedQFunction qf) { 910d0321e0SJeremy L Thompson int ierr; 920d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 930d0321e0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 940d0321e0SJeremy L Thompson Ceed ceed; 950d0321e0SJeremy L Thompson ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr); 960d0321e0SJeremy L Thompson if (data->module) 970d0321e0SJeremy L Thompson CeedChk_Cu(ceed, cuModuleUnload(data->module)); 980d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 99*437930d1SJeremy L Thompson 1000d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1010d0321e0SJeremy L Thompson } 1020d0321e0SJeremy L Thompson 1030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1040d0321e0SJeremy L Thompson // Set User QFunction 1050d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1060d0321e0SJeremy L Thompson static int CeedQFunctionSetCUDAUserFunction_Cuda(CeedQFunction qf, 1070d0321e0SJeremy L Thompson CUfunction f) { 1080d0321e0SJeremy L Thompson int ierr; 1090d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 1100d0321e0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 111*437930d1SJeremy L Thompson data->QFunction = f; 112*437930d1SJeremy L Thompson 1130d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1140d0321e0SJeremy L Thompson } 1150d0321e0SJeremy L Thompson 1160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1170d0321e0SJeremy L Thompson // Create QFunction 1180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1190d0321e0SJeremy L Thompson int CeedQFunctionCreate_Cuda(CeedQFunction qf) { 1200d0321e0SJeremy L Thompson int ierr; 1210d0321e0SJeremy L Thompson Ceed ceed; 1220d0321e0SJeremy L Thompson CeedQFunctionGetCeed(qf, &ceed); 1230d0321e0SJeremy L Thompson CeedQFunction_Cuda *data; 1240d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 1250d0321e0SJeremy L Thompson ierr = CeedQFunctionSetData(qf, data); CeedChkBackend(ierr); 1260d0321e0SJeremy L Thompson 1270d0321e0SJeremy L Thompson // Read QFunction source 128*437930d1SJeremy L Thompson ierr = CeedQFunctionGetKernelName(qf, &data->qfunction_name); 1290d0321e0SJeremy L Thompson CeedChkBackend(ierr); 130*437930d1SJeremy L Thompson ierr = CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source); 1310d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1320d0321e0SJeremy L Thompson 1330d0321e0SJeremy L Thompson // Register backend functions 1340d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", 1350d0321e0SJeremy L Thompson CeedQFunctionApply_Cuda); CeedChkBackend(ierr); 1360d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", 1370d0321e0SJeremy L Thompson CeedQFunctionDestroy_Cuda); CeedChkBackend(ierr); 1380d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "SetCUDAUserFunction", 1390d0321e0SJeremy L Thompson CeedQFunctionSetCUDAUserFunction_Cuda); 1400d0321e0SJeremy L Thompson CeedChkBackend(ierr); 1410d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1420d0321e0SJeremy L Thompson } 1430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 144