xref: /libCEED/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c (revision 318af0d1a760765edef6872663e190e11f82cd8e)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <cuda_runtime.h>
20 #include <string.h>
21 #include "ceed-cuda-ref.h"
22 
23 //------------------------------------------------------------------------------
24 // * Bytes used
25 //------------------------------------------------------------------------------
26 static inline size_t bytes(const CeedQFunctionContext ctx) {
27   int ierr;
28   size_t ctxsize;
29   ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
30   return ctxsize;
31 }
32 
33 //------------------------------------------------------------------------------
34 // Sync host to device
35 //------------------------------------------------------------------------------
36 static inline int CeedQFunctionContextSyncH2D_Cuda(
37   const CeedQFunctionContext ctx) {
38   int ierr;
39   Ceed ceed;
40   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
41   CeedQFunctionContext_Cuda *impl;
42   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
43 
44   if (!impl->h_data)
45     // LCOV_EXCL_START
46     return CeedError(ceed, CEED_ERROR_BACKEND,
47                      "No valid host data to sync to device");
48   // LCOV_EXCL_STOP
49 
50   if (impl->d_data_borrowed) {
51     impl->d_data = impl->d_data_borrowed;
52   } else if (impl->d_data_owned) {
53     impl->d_data = impl->d_data_owned;
54   } else {
55     ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx));
56     CeedChk_Cu(ceed, ierr);
57     impl->d_data = impl->d_data_owned;
58   }
59 
60   ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx),
61                     cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr);
62 
63   return CEED_ERROR_SUCCESS;
64 }
65 
66 //------------------------------------------------------------------------------
67 // Sync device to host
68 //------------------------------------------------------------------------------
69 static inline int CeedQFunctionContextSyncD2H_Cuda(
70   const CeedQFunctionContext ctx) {
71   int ierr;
72   Ceed ceed;
73   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
74   CeedQFunctionContext_Cuda *impl;
75   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
76 
77   if (!impl->d_data)
78     // LCOV_EXCL_START
79     return CeedError(ceed, CEED_ERROR_BACKEND,
80                      "No valid device data to sync to host");
81   // LCOV_EXCL_STOP
82 
83   if (impl->h_data_borrowed) {
84     impl->h_data = impl->h_data_borrowed;
85   } else if (impl->h_data_owned) {
86     impl->h_data = impl->h_data_owned;
87   } else {
88     ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned);
89     CeedChkBackend(ierr);
90     impl->h_data = impl->h_data_owned;
91   }
92 
93   ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx),
94                     cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr);
95 
96   return CEED_ERROR_SUCCESS;
97 }
98 
99 //------------------------------------------------------------------------------
100 // Sync data of type
101 //------------------------------------------------------------------------------
102 static inline int CeedQFunctionContextSync_Cuda(
103   const CeedQFunctionContext ctx, CeedMemType mem_type) {
104   switch (mem_type) {
105   case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx);
106   case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx);
107   }
108   return CEED_ERROR_UNSUPPORTED;
109 }
110 
111 //------------------------------------------------------------------------------
112 // Set all pointers as invalid
113 //------------------------------------------------------------------------------
114 static inline int CeedQFunctionContextSetAllInvalid_Cuda(
115   const CeedQFunctionContext ctx) {
116   int ierr;
117   CeedQFunctionContext_Cuda *impl;
118   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
119 
120   impl->h_data = NULL;
121   impl->d_data = NULL;
122 
123   return CEED_ERROR_SUCCESS;
124 }
125 
126 //------------------------------------------------------------------------------
127 // Check if ctx has valid data
128 //------------------------------------------------------------------------------
129 static inline int CeedQFunctionContextHasValidData_Cuda(
130   const CeedQFunctionContext ctx, bool *has_valid_data) {
131   int ierr;
132   CeedQFunctionContext_Cuda *impl;
133   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
134 
135   *has_valid_data = !!impl->h_data || !!impl->d_data;
136 
137   return CEED_ERROR_SUCCESS;
138 }
139 
140 //------------------------------------------------------------------------------
141 // Check if ctx has borrowed data
142 //------------------------------------------------------------------------------
143 static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda(
144   const CeedQFunctionContext ctx, CeedMemType mem_type,
145   bool *has_borrowed_data_of_type) {
146   int ierr;
147   CeedQFunctionContext_Cuda *impl;
148   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
149 
150   switch (mem_type) {
151   case CEED_MEM_HOST:
152     *has_borrowed_data_of_type = !!impl->h_data_borrowed;
153     break;
154   case CEED_MEM_DEVICE:
155     *has_borrowed_data_of_type = !!impl->d_data_borrowed;
156     break;
157   }
158 
159   return CEED_ERROR_SUCCESS;
160 }
161 
162 //------------------------------------------------------------------------------
163 // Check if data of given type needs sync
164 //------------------------------------------------------------------------------
165 static inline int CeedQFunctionContextNeedSync_Cuda(
166   const CeedQFunctionContext ctx, CeedMemType mem_type, bool *need_sync) {
167   int ierr;
168   CeedQFunctionContext_Cuda *impl;
169   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
170 
171   bool has_valid_data = true;
172   ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data);
173   CeedChkBackend(ierr);
174   switch (mem_type) {
175   case CEED_MEM_HOST:
176     *need_sync = has_valid_data && !impl->h_data;
177     break;
178   case CEED_MEM_DEVICE:
179     *need_sync = has_valid_data && !impl->d_data;
180     break;
181   }
182 
183   return CEED_ERROR_SUCCESS;
184 }
185 
186 //------------------------------------------------------------------------------
187 // Set data from host
188 //------------------------------------------------------------------------------
189 static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx,
190     const CeedCopyMode copy_mode, void *data) {
191   int ierr;
192   CeedQFunctionContext_Cuda *impl;
193   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
194 
195   ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
196   switch (copy_mode) {
197   case CEED_COPY_VALUES: {
198     ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr);
199     impl->h_data_borrowed = NULL;
200     impl->h_data = impl->h_data_owned;
201     memcpy(impl->h_data, data, bytes(ctx));
202   } break;
203   case CEED_OWN_POINTER:
204     impl->h_data_owned = data;
205     impl->h_data_borrowed = NULL;
206     impl->h_data = data;
207     break;
208   case CEED_USE_POINTER:
209     impl->h_data_borrowed = data;
210     impl->h_data = data;
211     break;
212   }
213 
214   return CEED_ERROR_SUCCESS;
215 }
216 
217 //------------------------------------------------------------------------------
218 // Set data from device
219 //------------------------------------------------------------------------------
220 static int CeedQFunctionContextSetDataDevice_Cuda(
221   const CeedQFunctionContext ctx, const CeedCopyMode copy_mode, void *data) {
222   int ierr;
223   Ceed ceed;
224   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
225   CeedQFunctionContext_Cuda *impl;
226   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
227 
228   ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
229   impl->d_data_owned = NULL;
230   switch (copy_mode) {
231   case CEED_COPY_VALUES:
232     ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx));
233     CeedChk_Cu(ceed, ierr);
234     impl->d_data_borrowed = NULL;
235     impl->d_data = impl->d_data_owned;
236     ierr = cudaMemcpy(impl->d_data, data, bytes(ctx),
237                       cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr);
238     break;
239   case CEED_OWN_POINTER:
240     impl->d_data_owned = data;
241     impl->d_data_borrowed = NULL;
242     impl->d_data = data;
243     break;
244   case CEED_USE_POINTER:
245     impl->d_data_owned = NULL;
246     impl->d_data_borrowed = data;
247     impl->d_data = data;
248     break;
249   }
250 
251   return CEED_ERROR_SUCCESS;
252 }
253 
254 //------------------------------------------------------------------------------
255 // Set the data used by a user context,
256 //   freeing any previously allocated data if applicable
257 //------------------------------------------------------------------------------
258 static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx,
259     const CeedMemType mem_type, const CeedCopyMode copy_mode, void *data) {
260   int ierr;
261   Ceed ceed;
262   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
263 
264   ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr);
265   switch (mem_type) {
266   case CEED_MEM_HOST:
267     return CeedQFunctionContextSetDataHost_Cuda(ctx, copy_mode, data);
268   case CEED_MEM_DEVICE:
269     return CeedQFunctionContextSetDataDevice_Cuda(ctx, copy_mode, data);
270   }
271 
272   return CEED_ERROR_UNSUPPORTED;
273 }
274 
275 //------------------------------------------------------------------------------
276 // Take data
277 //------------------------------------------------------------------------------
278 static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx,
279     const CeedMemType mem_type, void *data) {
280   int ierr;
281   Ceed ceed;
282   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
283   CeedQFunctionContext_Cuda *impl;
284   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
285 
286   // Sync data to requested mem_type
287   bool need_sync = false;
288   ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync);
289   CeedChkBackend(ierr);
290   if (need_sync) {
291     ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr);
292   }
293 
294   // Update pointer
295   switch (mem_type) {
296   case CEED_MEM_HOST:
297     *(void **)data = impl->h_data_borrowed;
298     impl->h_data_borrowed = NULL;
299     impl->h_data = NULL;
300     break;
301   case CEED_MEM_DEVICE:
302     *(void **)data = impl->d_data_borrowed;
303     impl->d_data_borrowed = NULL;
304     impl->d_data = NULL;
305     break;
306   }
307 
308   return CEED_ERROR_SUCCESS;
309 }
310 
311 //------------------------------------------------------------------------------
312 // Get data
313 //------------------------------------------------------------------------------
314 static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx,
315     const CeedMemType mem_type, void *data) {
316   int ierr;
317   Ceed ceed;
318   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
319   CeedQFunctionContext_Cuda *impl;
320   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
321 
322   // Sync data to requested mem_type
323   bool need_sync = false;
324   ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync);
325   CeedChkBackend(ierr);
326   if (need_sync) {
327     ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr);
328   }
329 
330   // Update pointer
331   switch (mem_type) {
332   case CEED_MEM_HOST:
333     *(void **)data = impl->h_data;
334     break;
335   case CEED_MEM_DEVICE:
336     *(void **)data = impl->d_data;
337     break;
338   }
339 
340   // Mark only pointer for requested memory as valid
341   ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr);
342   switch (mem_type) {
343   case CEED_MEM_HOST:
344     impl->h_data = *(void **)data;
345     break;
346   case CEED_MEM_DEVICE:
347     impl->d_data = *(void **)data;
348     break;
349   }
350 
351   return CEED_ERROR_SUCCESS;
352 }
353 
354 //------------------------------------------------------------------------------
355 // Destroy the user context
356 //------------------------------------------------------------------------------
357 static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) {
358   int ierr;
359   Ceed ceed;
360   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
361   CeedQFunctionContext_Cuda *impl;
362   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
363 
364   ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
365   ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
366   ierr = CeedFree(&impl); CeedChkBackend(ierr);
367 
368   return CEED_ERROR_SUCCESS;
369 }
370 
371 //------------------------------------------------------------------------------
372 // QFunctionContext Create
373 //------------------------------------------------------------------------------
374 int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) {
375   int ierr;
376   CeedQFunctionContext_Cuda *impl;
377   Ceed ceed;
378   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
379 
380   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData",
381                                 CeedQFunctionContextHasValidData_Cuda);
382   CeedChkBackend(ierr);
383   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx,
384                                 "HasBorrowedDataOfType",
385                                 CeedQFunctionContextHasBorrowedDataOfType_Cuda);
386   CeedChkBackend(ierr);
387   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData",
388                                 CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr);
389   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData",
390                                 CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr);
391   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData",
392                                 CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr);
393   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy",
394                                 CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr);
395 
396   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
397   ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr);
398 
399   return CEED_ERROR_SUCCESS;
400 }
401 //------------------------------------------------------------------------------
402