xref: /libCEED/backends/cuda-gen/ceed-cuda-gen-qfunction.c (revision 6bbcfef4b418ab7c854cd1f968d28b3250f9c19e)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include <stdio.h>
19 #include "ceed-cuda-gen.h"
20 
21 //------------------------------------------------------------------------------
22 // Apply QFunction
23 //------------------------------------------------------------------------------
24 static int CeedQFunctionApply_Cuda_gen(CeedQFunction qf, CeedInt Q,
25                                        CeedVector *U, CeedVector *V) {
26   int ierr;
27   Ceed ceed;
28   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr);
29   return CeedError(ceed, 1, "Backend does not implement QFunctionApply");
30 }
31 
32 //------------------------------------------------------------------------------
33 // Destroy QFunction
34 //------------------------------------------------------------------------------
35 static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) {
36   int ierr;
37   CeedQFunction_Cuda_gen *data;
38   ierr = CeedQFunctionGetData(qf, (void *)&data); CeedChk(ierr);
39   Ceed ceed;
40   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr);
41   ierr = cudaFree(data->d_c); CeedChk_Cu(ceed, ierr);
42   ierr = CeedFree(&data->qFunctionSource); CeedChk(ierr);
43   ierr = CeedFree(&data); CeedChk(ierr);
44   return 0;
45 }
46 
47 //------------------------------------------------------------------------------
48 // Load QFunction
49 //------------------------------------------------------------------------------
50 static int loadCudaFunction(CeedQFunction qf, char *c_src_file) {
51   int ierr;
52   Ceed ceed;
53   CeedQFunctionGetCeed(qf, &ceed);
54   CeedQFunction_Cuda_gen *data;
55   ierr = CeedQFunctionGetData(qf, (void *)&data); CeedChk(ierr);
56 
57   // Find source file
58   char *cuda_file;
59   ierr = CeedCalloc(CUDA_MAX_PATH, &cuda_file); CeedChk(ierr);
60   memcpy(cuda_file, c_src_file, strlen(c_src_file));
61   const char *last_dot = strrchr(cuda_file, '.');
62   if (!last_dot)
63     return CeedError(ceed, 1, "Cannot find file's extension!");
64   const size_t cuda_path_len = last_dot - cuda_file;
65   strncpy(&cuda_file[cuda_path_len], ".h", 3);
66 
67   // Open source file
68   FILE *fp;
69   long lSize;
70   char *buffer;
71   fp = fopen (cuda_file, "rb");
72   if (!fp)
73     // LCOV_EXCL_START
74     CeedError(ceed, 1, "Couldn't open the Cuda file for the QFunction.");
75   // LCOV_EXCL_STOP
76 
77   // Compute size of source file
78   fseek(fp, 0L, SEEK_END);
79   lSize = ftell(fp);
80   rewind(fp);
81 
82   // Allocate memory for entire content
83   ierr = CeedCalloc(lSize+1, &buffer); CeedChk(ierr);
84 
85   // Copy the file into the buffer
86   if (1 != fread(buffer, lSize, 1, fp)) {
87     // LCOV_EXCL_START
88     fclose(fp);
89     ierr = CeedFree(&buffer); CeedChk(ierr);
90     CeedError(ceed, 1, "Couldn't read the Cuda file for the QFunction.");
91     // LCOV_EXCL_STOP
92   }
93 
94   // Append typedef and save source string
95   // FIXME: the magic number 16 should be defined somewhere...
96   char *fields_string =
97     "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda_gen;";
98   ierr = CeedMalloc(1 + strlen(fields_string) + strlen(buffer),
99                     &data->qFunctionSource); CeedChk(ierr);
100   memcpy(data->qFunctionSource, fields_string, strlen(fields_string));
101   memcpy(data->qFunctionSource + strlen(fields_string), buffer,
102          strlen(buffer) + 1);
103 
104   // Cleanup
105   ierr = CeedFree(&buffer); CeedChk(ierr);
106   fclose(fp);
107   ierr = CeedFree(&cuda_file); CeedChk(ierr);
108   return 0;
109 }
110 
111 //------------------------------------------------------------------------------
112 // Create QFunction
113 //------------------------------------------------------------------------------
114 int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) {
115   int ierr;
116   Ceed ceed;
117   CeedQFunctionGetCeed(qf, &ceed);
118   CeedQFunction_Cuda_gen *data;
119   ierr = CeedCalloc(1,&data); CeedChk(ierr);
120   ierr = CeedQFunctionSetData(qf, (void *)&data); CeedChk(ierr);
121   size_t ctxsize;
122   ierr = CeedQFunctionGetContextSize(qf, &ctxsize); CeedChk(ierr);
123   ierr = cudaMalloc(&data->d_c, ctxsize); CeedChk_Cu(ceed, ierr);
124 
125   char *source;
126   ierr = CeedQFunctionGetSourcePath(qf, &source); CeedChk(ierr);
127   const char *funname = strrchr(source, ':') + 1;
128   data->qFunctionName = (char *)funname;
129   const int filenamelen = funname - source;
130   char filename[filenamelen];
131   memcpy(filename, source, filenamelen - 1);
132   filename[filenamelen - 1] = '\0';
133   ierr = loadCudaFunction(qf, filename); CeedChk(ierr);
134 
135   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Apply",
136                                 CeedQFunctionApply_Cuda_gen); CeedChk(ierr);
137   ierr = CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy",
138                                 CeedQFunctionDestroy_Cuda_gen); CeedChk(ierr);
139   return 0;
140 }
141 //------------------------------------------------------------------------------
142