1241a4b83SYohann // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2241a4b83SYohann // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3241a4b83SYohann // All Rights reserved. See files LICENSE and NOTICE for details. 4241a4b83SYohann // 5241a4b83SYohann // This file is part of CEED, a collection of benchmarks, miniapps, software 6241a4b83SYohann // libraries and APIs for efficient high-order finite element and spectral 7241a4b83SYohann // element discretizations for exascale applications. For more information and 8241a4b83SYohann // source code availability see http://github.com/ceed. 9241a4b83SYohann // 10241a4b83SYohann // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11241a4b83SYohann // a collaborative effort of two U.S. Department of Energy organizations (Office 12241a4b83SYohann // of Science and the National Nuclear Security Administration) responsible for 13241a4b83SYohann // the planning and preparation of a capable exascale ecosystem, including 14241a4b83SYohann // software, applications, hardware, advanced system engineering and early 15241a4b83SYohann // testbed platforms, in support of the nation's exascale computing imperative. 16241a4b83SYohann 17241a4b83SYohann #include <ceed-backend.h> 18241a4b83SYohann #include <string.h> 19241a4b83SYohann #include <stdio.h> 20241a4b83SYohann #include "../cuda/ceed-cuda.h" 21241a4b83SYohann #include "ceed-cuda-gen.h" 22241a4b83SYohann 23241a4b83SYohann static int CeedQFunctionApply_Cuda_gen(CeedQFunction qf, CeedInt Q, 24241a4b83SYohann CeedVector *U, CeedVector *V) { 25241a4b83SYohann int ierr; 26241a4b83SYohann Ceed ceed; 27241a4b83SYohann ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 28241a4b83SYohann return CeedError(ceed, 1, "Backend does not implement QFunctionApply"); 29241a4b83SYohann } 30241a4b83SYohann 31241a4b83SYohann static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) { 32241a4b83SYohann int ierr; 33241a4b83SYohann CeedQFunction_Cuda_gen *data; 34241a4b83SYohann ierr = CeedQFunctionGetData(qf, (void *)&data); CeedChk(ierr); 35241a4b83SYohann Ceed ceed; 36241a4b83SYohann ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 37241a4b83SYohann 38241a4b83SYohann ierr = cudaFree(data->d_c); CeedChk_Cu(ceed, ierr); 39241a4b83SYohann 40241a4b83SYohann ierr = CeedFree(&data); CeedChk(ierr); 41241a4b83SYohann 42241a4b83SYohann return 0; 43241a4b83SYohann } 44241a4b83SYohann 45241a4b83SYohann static int loadCudaFunction(CeedQFunction qf, char *c_src_file) { 46241a4b83SYohann int ierr; 47241a4b83SYohann Ceed ceed; 48241a4b83SYohann CeedQFunctionGetCeed(qf, &ceed); 49241a4b83SYohann char *cuda_file; 50241a4b83SYohann ierr = CeedCalloc(CUDA_MAX_PATH, &cuda_file); CeedChk(ierr); 51241a4b83SYohann memcpy(cuda_file, c_src_file, strlen(c_src_file)); 52241a4b83SYohann const char *last_dot = strrchr(cuda_file, '.'); 53241a4b83SYohann if (!last_dot) 54241a4b83SYohann return CeedError(ceed, 1, "Cannot find file's extension!"); 55241a4b83SYohann const size_t cuda_path_len = last_dot - cuda_file; 56*4d537eeaSYohann strcpy(&cuda_file[cuda_path_len], ".h"); 57241a4b83SYohann //******************* 58241a4b83SYohann FILE *fp; 59241a4b83SYohann long lSize; 60241a4b83SYohann char *buffer; 61241a4b83SYohann 62241a4b83SYohann fp = fopen ( cuda_file, "rb" ); 63241a4b83SYohann if( !fp ) CeedError(ceed, 1, "Couldn't open the Cuda file for the QFunction."); 64241a4b83SYohann 65241a4b83SYohann fseek( fp, 0L, SEEK_END); 66241a4b83SYohann lSize = ftell( fp ); 67241a4b83SYohann rewind( fp ); 68241a4b83SYohann 69241a4b83SYohann /* allocate memory for entire content */ 70241a4b83SYohann ierr = CeedCalloc( lSize+1, &buffer ); CeedChk(ierr); 71241a4b83SYohann 72241a4b83SYohann /* copy the file into the buffer */ 73241a4b83SYohann if( 1!=fread( buffer, lSize, 1, fp) ) { 74241a4b83SYohann fclose(fp); 75241a4b83SYohann CeedFree(&buffer); 76241a4b83SYohann CeedError(ceed, 1, "Couldn't read the Cuda file for the QFunction."); 77241a4b83SYohann } 78241a4b83SYohann 79241a4b83SYohann //FIXME: the magic number 16 should be defined somewhere... 80241a4b83SYohann char *fields_string = 81241a4b83SYohann "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda_gen;"; 82241a4b83SYohann char *source = (char *) malloc(1 + strlen(fields_string)+ strlen(buffer) ); 83241a4b83SYohann strcpy(source, fields_string); 84241a4b83SYohann strcat(source, buffer); 85241a4b83SYohann 86241a4b83SYohann //******************** 87241a4b83SYohann CeedQFunction_Cuda_gen *data; 88241a4b83SYohann ierr = CeedQFunctionGetData(qf, (void *)&data); CeedChk(ierr); 89241a4b83SYohann data->qFunctionSource = buffer; 90241a4b83SYohann 91241a4b83SYohann //******************** 92241a4b83SYohann fclose(fp); 93241a4b83SYohann 94241a4b83SYohann return 0; 95241a4b83SYohann } 96241a4b83SYohann 97241a4b83SYohann int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) { 98241a4b83SYohann int ierr; 99241a4b83SYohann Ceed ceed; 100241a4b83SYohann CeedQFunctionGetCeed(qf, &ceed); 101241a4b83SYohann CeedQFunction_Cuda_gen *data; 102241a4b83SYohann ierr = CeedCalloc(1,&data); CeedChk(ierr); 103241a4b83SYohann ierr = CeedQFunctionSetData(qf, (void *)&data); CeedChk(ierr); 104241a4b83SYohann size_t ctxsize; 105241a4b83SYohann ierr = CeedQFunctionGetContextSize(qf, &ctxsize); CeedChk(ierr); 106241a4b83SYohann ierr = cudaMalloc(&data->d_c, ctxsize); CeedChk_Cu(ceed, ierr); 107241a4b83SYohann 108241a4b83SYohann char *focca; 109241a4b83SYohann ierr = CeedQFunctionGetFOCCA(qf, &focca); CeedChk(ierr); 110241a4b83SYohann const char *funname = strrchr(focca, ':') + 1; 111241a4b83SYohann data->qFunctionName = (char *)funname; 112241a4b83SYohann const int filenamelen = funname - focca; 113241a4b83SYohann char filename[filenamelen]; 114241a4b83SYohann memcpy(filename, focca, filenamelen - 1); 115241a4b83SYohann filename[filenamelen - 1] = '\0'; 116241a4b83SYohann ierr = loadCudaFunction(qf, filename); CeedChk(ierr); 117241a4b83SYohann 118241a4b83SYohann ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", 119241a4b83SYohann CeedQFunctionApply_Cuda_gen); CeedChk(ierr); 120241a4b83SYohann ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", 121241a4b83SYohann CeedQFunctionDestroy_Cuda_gen); CeedChk(ierr); 122241a4b83SYohann return 0; 123241a4b83SYohann } 124