1241a4b83SYohann // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2241a4b83SYohann // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3241a4b83SYohann // All Rights reserved. See files LICENSE and NOTICE for details. 4241a4b83SYohann // 5241a4b83SYohann // This file is part of CEED, a collection of benchmarks, miniapps, software 6241a4b83SYohann // libraries and APIs for efficient high-order finite element and spectral 7241a4b83SYohann // element discretizations for exascale applications. For more information and 8241a4b83SYohann // source code availability see http://github.com/ceed. 9241a4b83SYohann // 10241a4b83SYohann // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11241a4b83SYohann // a collaborative effort of two U.S. Department of Energy organizations (Office 12241a4b83SYohann // of Science and the National Nuclear Security Administration) responsible for 13241a4b83SYohann // the planning and preparation of a capable exascale ecosystem, including 14241a4b83SYohann // software, applications, hardware, advanced system engineering and early 15241a4b83SYohann // testbed platforms, in support of the nation's exascale computing imperative. 16241a4b83SYohann 17*ec3da8bcSJed Brown #include <ceed/ceed.h> 18*ec3da8bcSJed Brown #include <ceed/backend.h> 193d576824SJeremy L Thompson #include <cuda_runtime.h> 20241a4b83SYohann #include <stdio.h> 213d576824SJeremy L Thompson #include <string.h> 22241a4b83SYohann #include "ceed-cuda-gen.h" 233d576824SJeremy L Thompson #include "../cuda/ceed-cuda.h" 24241a4b83SYohann 25ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 26ab213215SJeremy L Thompson // Apply QFunction 27ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 28241a4b83SYohann static int CeedQFunctionApply_Cuda_gen(CeedQFunction qf, CeedInt Q, 29241a4b83SYohann CeedVector *U, CeedVector *V) { 30241a4b83SYohann int ierr; 31241a4b83SYohann Ceed ceed; 32e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr); 33e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 34e15f9bd0SJeremy L Thompson "Backend does not implement QFunctionApply"); 35241a4b83SYohann } 36241a4b83SYohann 37ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 38ab213215SJeremy L Thompson // Destroy QFunction 39ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 40241a4b83SYohann static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) { 41241a4b83SYohann int ierr; 42241a4b83SYohann CeedQFunction_Cuda_gen *data; 43e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 44241a4b83SYohann Ceed ceed; 45e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChkBackend(ierr); 46241a4b83SYohann ierr = cudaFree(data->d_c); CeedChk_Cu(ceed, ierr); 47e15f9bd0SJeremy L Thompson ierr = CeedFree(&data->qFunctionSource); CeedChkBackend(ierr); 48e15f9bd0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 49e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 50241a4b83SYohann } 51241a4b83SYohann 52ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 53ab213215SJeremy L Thompson // Load QFunction 54ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 55241a4b83SYohann static int loadCudaFunction(CeedQFunction qf, char *c_src_file) { 56241a4b83SYohann int ierr; 57241a4b83SYohann Ceed ceed; 58241a4b83SYohann CeedQFunctionGetCeed(qf, &ceed); 59465fc175SJeremy L Thompson CeedQFunction_Cuda_gen *data; 60e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &data); CeedChkBackend(ierr); 61ab213215SJeremy L Thompson 62ab213215SJeremy L Thompson // Find source file 63241a4b83SYohann char *cuda_file; 64e15f9bd0SJeremy L Thompson ierr = CeedCalloc(CUDA_MAX_PATH, &cuda_file); CeedChkBackend(ierr); 65241a4b83SYohann memcpy(cuda_file, c_src_file, strlen(c_src_file)); 66241a4b83SYohann const char *last_dot = strrchr(cuda_file, '.'); 67241a4b83SYohann if (!last_dot) 68e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Cannot find file's extension!"); 69241a4b83SYohann const size_t cuda_path_len = last_dot - cuda_file; 70465fc175SJeremy L Thompson strncpy(&cuda_file[cuda_path_len], ".h", 3); 71ab213215SJeremy L Thompson 72ab213215SJeremy L Thompson // Open source file 73241a4b83SYohann FILE *fp; 74241a4b83SYohann long lSize; 75241a4b83SYohann char *buffer; 76241a4b83SYohann fp = fopen (cuda_file, "rb"); 77ab213215SJeremy L Thompson if (!fp) 78e9f4dca0SJeremy L Thompson // LCOV_EXCL_START 79e15f9bd0SJeremy L Thompson CeedError(ceed, CEED_ERROR_BACKEND, 80e15f9bd0SJeremy L Thompson "Couldn't open the Cuda file for the QFunction."); 81e9f4dca0SJeremy L Thompson // LCOV_EXCL_STOP 82241a4b83SYohann 83ab213215SJeremy L Thompson // Compute size of source file 84241a4b83SYohann fseek(fp, 0L, SEEK_END); 85241a4b83SYohann lSize = ftell(fp); 86241a4b83SYohann rewind(fp); 87241a4b83SYohann 88ab213215SJeremy L Thompson // Allocate memory for entire content 89e15f9bd0SJeremy L Thompson ierr = CeedCalloc(lSize+1, &buffer); CeedChkBackend(ierr); 90241a4b83SYohann 91ab213215SJeremy L Thompson // Copy the file into the buffer 92241a4b83SYohann if (1 != fread(buffer, lSize, 1, fp)) { 936bbcfef4SJeremy L Thompson // LCOV_EXCL_START 94241a4b83SYohann fclose(fp); 95e15f9bd0SJeremy L Thompson ierr = CeedFree(&buffer); CeedChkBackend(ierr); 96e15f9bd0SJeremy L Thompson CeedError(ceed, CEED_ERROR_BACKEND, 97e15f9bd0SJeremy L Thompson "Couldn't read the Cuda file for the QFunction."); 98e9f4dca0SJeremy L Thompson // LCOV_EXCL_STOP 99241a4b83SYohann } 100241a4b83SYohann 101ab213215SJeremy L Thompson // Append typedef and save source string 102241a4b83SYohann // FIXME: the magic number 16 should be defined somewhere... 103241a4b83SYohann char *fields_string = 104241a4b83SYohann "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda_gen;"; 105465fc175SJeremy L Thompson ierr = CeedMalloc(1 + strlen(fields_string) + strlen(buffer), 106e15f9bd0SJeremy L Thompson &data->qFunctionSource); CeedChkBackend(ierr); 1079ff7e165SJed Brown memcpy(data->qFunctionSource, fields_string, strlen(fields_string)); 1089ff7e165SJed Brown memcpy(data->qFunctionSource + strlen(fields_string), buffer, 1099ff7e165SJed Brown strlen(buffer) + 1); 110241a4b83SYohann 111465fc175SJeremy L Thompson // Cleanup 112e15f9bd0SJeremy L Thompson ierr = CeedFree(&buffer); CeedChkBackend(ierr); 113ab213215SJeremy L Thompson fclose(fp); 114e15f9bd0SJeremy L Thompson ierr = CeedFree(&cuda_file); CeedChkBackend(ierr); 115e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 116241a4b83SYohann } 117241a4b83SYohann 118ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 119ab213215SJeremy L Thompson // Create QFunction 120ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 121241a4b83SYohann int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) { 122241a4b83SYohann int ierr; 123241a4b83SYohann Ceed ceed; 124241a4b83SYohann CeedQFunctionGetCeed(qf, &ceed); 125241a4b83SYohann CeedQFunction_Cuda_gen *data; 126e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 127e15f9bd0SJeremy L Thompson ierr = CeedQFunctionSetData(qf, data); CeedChkBackend(ierr); 128241a4b83SYohann 129288c0443SJeremy L Thompson char *source; 130e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetSourcePath(qf, &source); CeedChkBackend(ierr); 131503c598bSjeremylt const char *funname = strrchr(source, ':') + 1; 132241a4b83SYohann data->qFunctionName = (char *)funname; 133288c0443SJeremy L Thompson const int filenamelen = funname - source; 134241a4b83SYohann char filename[filenamelen]; 135288c0443SJeremy L Thompson memcpy(filename, source, filenamelen - 1); 136241a4b83SYohann filename[filenamelen - 1] = '\0'; 137e15f9bd0SJeremy L Thompson ierr = loadCudaFunction(qf, filename); CeedChkBackend(ierr); 138241a4b83SYohann 139241a4b83SYohann ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", 140e15f9bd0SJeremy L Thompson CeedQFunctionApply_Cuda_gen); CeedChkBackend(ierr); 141241a4b83SYohann ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", 142e15f9bd0SJeremy L Thompson CeedQFunctionDestroy_Cuda_gen); CeedChkBackend(ierr); 143e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 144241a4b83SYohann } 145ab213215SJeremy L Thompson //------------------------------------------------------------------------------ 146