1*241a4b83SYohann // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*241a4b83SYohann // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*241a4b83SYohann // All Rights reserved. See files LICENSE and NOTICE for details. 4*241a4b83SYohann // 5*241a4b83SYohann // This file is part of CEED, a collection of benchmarks, miniapps, software 6*241a4b83SYohann // libraries and APIs for efficient high-order finite element and spectral 7*241a4b83SYohann // element discretizations for exascale applications. For more information and 8*241a4b83SYohann // source code availability see http://github.com/ceed. 9*241a4b83SYohann // 10*241a4b83SYohann // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*241a4b83SYohann // a collaborative effort of two U.S. Department of Energy organizations (Office 12*241a4b83SYohann // of Science and the National Nuclear Security Administration) responsible for 13*241a4b83SYohann // the planning and preparation of a capable exascale ecosystem, including 14*241a4b83SYohann // software, applications, hardware, advanced system engineering and early 15*241a4b83SYohann // testbed platforms, in support of the nation's exascale computing imperative. 16*241a4b83SYohann 17*241a4b83SYohann #include <ceed-backend.h> 18*241a4b83SYohann #include <string.h> 19*241a4b83SYohann #include <stdio.h> 20*241a4b83SYohann #include "../cuda/ceed-cuda.h" 21*241a4b83SYohann #include "ceed-cuda-gen.h" 22*241a4b83SYohann 23*241a4b83SYohann static int CeedQFunctionApply_Cuda_gen(CeedQFunction qf, CeedInt Q, 24*241a4b83SYohann CeedVector *U, CeedVector *V) { 25*241a4b83SYohann int ierr; 26*241a4b83SYohann Ceed ceed; 27*241a4b83SYohann ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 28*241a4b83SYohann return CeedError(ceed, 1, "Backend does not implement QFunctionApply"); 29*241a4b83SYohann } 30*241a4b83SYohann 31*241a4b83SYohann static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) { 32*241a4b83SYohann int ierr; 33*241a4b83SYohann CeedQFunction_Cuda_gen *data; 34*241a4b83SYohann ierr = CeedQFunctionGetData(qf, (void *)&data); CeedChk(ierr); 35*241a4b83SYohann Ceed ceed; 36*241a4b83SYohann ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 37*241a4b83SYohann 38*241a4b83SYohann ierr = cudaFree(data->d_c); CeedChk_Cu(ceed, ierr); 39*241a4b83SYohann 40*241a4b83SYohann ierr = CeedFree(&data); CeedChk(ierr); 41*241a4b83SYohann 42*241a4b83SYohann return 0; 43*241a4b83SYohann } 44*241a4b83SYohann 45*241a4b83SYohann static int loadCudaFunction(CeedQFunction qf, char *c_src_file) { 46*241a4b83SYohann int ierr; 47*241a4b83SYohann Ceed ceed; 48*241a4b83SYohann CeedQFunctionGetCeed(qf, &ceed); 49*241a4b83SYohann char *cuda_file; 50*241a4b83SYohann ierr = CeedCalloc(CUDA_MAX_PATH, &cuda_file); CeedChk(ierr); 51*241a4b83SYohann memcpy(cuda_file, c_src_file, strlen(c_src_file)); 52*241a4b83SYohann const char *last_dot = strrchr(cuda_file, '.'); 53*241a4b83SYohann if (!last_dot) 54*241a4b83SYohann return CeedError(ceed, 1, "Cannot find file's extension!"); 55*241a4b83SYohann const size_t cuda_path_len = last_dot - cuda_file; 56*241a4b83SYohann strcpy(&cuda_file[cuda_path_len], ".qf"); 57*241a4b83SYohann //******************* 58*241a4b83SYohann FILE *fp; 59*241a4b83SYohann long lSize; 60*241a4b83SYohann char *buffer; 61*241a4b83SYohann 62*241a4b83SYohann fp = fopen ( cuda_file, "rb" ); 63*241a4b83SYohann if( !fp ) CeedError(ceed, 1, "Couldn't open the Cuda file for the QFunction."); 64*241a4b83SYohann 65*241a4b83SYohann fseek( fp, 0L, SEEK_END); 66*241a4b83SYohann lSize = ftell( fp ); 67*241a4b83SYohann rewind( fp ); 68*241a4b83SYohann 69*241a4b83SYohann /* allocate memory for entire content */ 70*241a4b83SYohann ierr = CeedCalloc( lSize+1, &buffer ); CeedChk(ierr); 71*241a4b83SYohann 72*241a4b83SYohann /* copy the file into the buffer */ 73*241a4b83SYohann if( 1!=fread( buffer, lSize, 1, fp) ) { 74*241a4b83SYohann fclose(fp); 75*241a4b83SYohann CeedFree(&buffer); 76*241a4b83SYohann CeedError(ceed, 1, "Couldn't read the Cuda file for the QFunction."); 77*241a4b83SYohann } 78*241a4b83SYohann 79*241a4b83SYohann //FIXME: the magic number 16 should be defined somewhere... 80*241a4b83SYohann char *fields_string = 81*241a4b83SYohann "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda_gen;"; 82*241a4b83SYohann char *source = (char *) malloc(1 + strlen(fields_string)+ strlen(buffer) ); 83*241a4b83SYohann strcpy(source, fields_string); 84*241a4b83SYohann strcat(source, buffer); 85*241a4b83SYohann 86*241a4b83SYohann //******************** 87*241a4b83SYohann CeedQFunction_Cuda_gen *data; 88*241a4b83SYohann ierr = CeedQFunctionGetData(qf, (void *)&data); CeedChk(ierr); 89*241a4b83SYohann data->qFunctionSource = buffer; 90*241a4b83SYohann 91*241a4b83SYohann //******************** 92*241a4b83SYohann fclose(fp); 93*241a4b83SYohann 94*241a4b83SYohann return 0; 95*241a4b83SYohann } 96*241a4b83SYohann 97*241a4b83SYohann int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) { 98*241a4b83SYohann int ierr; 99*241a4b83SYohann Ceed ceed; 100*241a4b83SYohann CeedQFunctionGetCeed(qf, &ceed); 101*241a4b83SYohann CeedQFunction_Cuda_gen *data; 102*241a4b83SYohann ierr = CeedCalloc(1,&data); CeedChk(ierr); 103*241a4b83SYohann ierr = CeedQFunctionSetData(qf, (void *)&data); CeedChk(ierr); 104*241a4b83SYohann // CeedInt numinputfields, numoutputfields; 105*241a4b83SYohann // ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 106*241a4b83SYohann size_t ctxsize; 107*241a4b83SYohann ierr = CeedQFunctionGetContextSize(qf, &ctxsize); CeedChk(ierr); 108*241a4b83SYohann ierr = cudaMalloc(&data->d_c, ctxsize); CeedChk_Cu(ceed, ierr); 109*241a4b83SYohann 110*241a4b83SYohann char *focca; 111*241a4b83SYohann ierr = CeedQFunctionGetFOCCA(qf, &focca); CeedChk(ierr); 112*241a4b83SYohann const char *funname = strrchr(focca, ':') + 1; 113*241a4b83SYohann data->qFunctionName = (char *)funname; 114*241a4b83SYohann const int filenamelen = funname - focca; 115*241a4b83SYohann char filename[filenamelen]; 116*241a4b83SYohann memcpy(filename, focca, filenamelen - 1); 117*241a4b83SYohann filename[filenamelen - 1] = '\0'; 118*241a4b83SYohann ierr = loadCudaFunction(qf, filename); CeedChk(ierr); 119*241a4b83SYohann 120*241a4b83SYohann ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", 121*241a4b83SYohann CeedQFunctionApply_Cuda_gen); CeedChk(ierr); 122*241a4b83SYohann ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", 123*241a4b83SYohann CeedQFunctionDestroy_Cuda_gen); CeedChk(ierr); 124*241a4b83SYohann return 0; 125*241a4b83SYohann } 126