xref: /libCEED/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c (revision 539ec17d7efe6a80c4ab8b3d6b91c3433981191e)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <cuda_runtime.h>
20 #include <string.h>
21 #include "ceed-cuda-ref.h"
22 
23 //------------------------------------------------------------------------------
24 // Sync host to device
25 //------------------------------------------------------------------------------
26 static inline int CeedQFunctionContextSyncH2D_Cuda(
27   const CeedQFunctionContext ctx) {
28   int ierr;
29   Ceed ceed;
30   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
31   CeedQFunctionContext_Cuda *impl;
32   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
33 
34   if (!impl->h_data)
35     // LCOV_EXCL_START
36     return CeedError(ceed, CEED_ERROR_BACKEND,
37                      "No valid host data to sync to device");
38   // LCOV_EXCL_STOP
39 
40   size_t ctxsize;
41   ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
42 
43   if (impl->d_data_borrowed) {
44     impl->d_data = impl->d_data_borrowed;
45   } else if (impl->d_data_owned) {
46     impl->d_data = impl->d_data_owned;
47   } else {
48     ierr = cudaMalloc((void **)&impl->d_data_owned, ctxsize);
49     CeedChk_Cu(ceed, ierr);
50     impl->d_data = impl->d_data_owned;
51   }
52 
53   ierr = cudaMemcpy(impl->d_data, impl->h_data, ctxsize,
54                     cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr);
55 
56   return CEED_ERROR_SUCCESS;
57 }
58 
59 //------------------------------------------------------------------------------
60 // Sync device to host
61 //------------------------------------------------------------------------------
62 static inline int CeedQFunctionContextSyncD2H_Cuda(
63   const CeedQFunctionContext ctx) {
64   int ierr;
65   Ceed ceed;
66   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
67   CeedQFunctionContext_Cuda *impl;
68   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
69 
70   if (!impl->d_data)
71     // LCOV_EXCL_START
72     return CeedError(ceed, CEED_ERROR_BACKEND,
73                      "No valid device data to sync to host");
74   // LCOV_EXCL_STOP
75 
76   size_t ctxsize;
77   ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
78 
79   if (impl->h_data_borrowed) {
80     impl->h_data = impl->h_data_borrowed;
81   } else if (impl->h_data_owned) {
82     impl->h_data = impl->h_data_owned;
83   } else {
84     ierr = CeedMalloc(ctxsize, &impl->h_data_owned);
85     CeedChkBackend(ierr);
86     impl->h_data = impl->h_data_owned;
87   }
88 
89   ierr = cudaMemcpy(impl->h_data, impl->d_data, ctxsize,
90                     cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr);
91 
92   return CEED_ERROR_SUCCESS;
93 }
94 
95 //------------------------------------------------------------------------------
96 // Sync data of type
97 //------------------------------------------------------------------------------
98 static inline int CeedQFunctionContextSync_Cuda(
99   const CeedQFunctionContext ctx, CeedMemType mem_type) {
100   switch (mem_type) {
101   case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx);
102   case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx);
103   }
104   return CEED_ERROR_UNSUPPORTED;
105 }
106 
107 //------------------------------------------------------------------------------
108 // Set all pointers as invalid
109 //------------------------------------------------------------------------------
110 static inline int CeedQFunctionContextSetAllInvalid_Cuda(
111   const CeedQFunctionContext ctx) {
112   int ierr;
113   CeedQFunctionContext_Cuda *impl;
114   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
115 
116   impl->h_data = NULL;
117   impl->d_data = NULL;
118 
119   return CEED_ERROR_SUCCESS;
120 }
121 
122 //------------------------------------------------------------------------------
123 // Check if ctx has valid data
124 //------------------------------------------------------------------------------
125 static inline int CeedQFunctionContextHasValidData_Cuda(
126   const CeedQFunctionContext ctx, bool *has_valid_data) {
127   int ierr;
128   CeedQFunctionContext_Cuda *impl;
129   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
130 
131   *has_valid_data = !!impl->h_data || !!impl->d_data;
132 
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Check if ctx has borrowed data
138 //------------------------------------------------------------------------------
139 static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda(
140   const CeedQFunctionContext ctx, CeedMemType mem_type,
141   bool *has_borrowed_data_of_type) {
142   int ierr;
143   CeedQFunctionContext_Cuda *impl;
144   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
145 
146   switch (mem_type) {
147   case CEED_MEM_HOST:
148     *has_borrowed_data_of_type = !!impl->h_data_borrowed;
149     break;
150   case CEED_MEM_DEVICE:
151     *has_borrowed_data_of_type = !!impl->d_data_borrowed;
152     break;
153   }
154 
155   return CEED_ERROR_SUCCESS;
156 }
157 
158 //------------------------------------------------------------------------------
159 // Check if data of given type needs sync
160 //------------------------------------------------------------------------------
161 static inline int CeedQFunctionContextNeedSync_Cuda(
162   const CeedQFunctionContext ctx, CeedMemType mem_type, bool *need_sync) {
163   int ierr;
164   CeedQFunctionContext_Cuda *impl;
165   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
166 
167   bool has_valid_data = true;
168   ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data);
169   CeedChkBackend(ierr);
170   switch (mem_type) {
171   case CEED_MEM_HOST:
172     *need_sync = has_valid_data && !impl->h_data;
173     break;
174   case CEED_MEM_DEVICE:
175     *need_sync = has_valid_data && !impl->d_data;
176     break;
177   }
178 
179   return CEED_ERROR_SUCCESS;
180 }
181 
182 //------------------------------------------------------------------------------
183 // Set data from host
184 //------------------------------------------------------------------------------
185 static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx,
186     const CeedCopyMode copy_mode, void *data) {
187   int ierr;
188   CeedQFunctionContext_Cuda *impl;
189   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
190 
191   ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
192   switch (copy_mode) {
193   case CEED_COPY_VALUES: {
194     size_t ctxsize;
195     ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
196     ierr = CeedMalloc(ctxsize, &impl->h_data_owned); CeedChkBackend(ierr);
197     impl->h_data_borrowed = NULL;
198     impl->h_data = impl->h_data_owned;
199     memcpy(impl->h_data, data, ctxsize);
200   } break;
201   case CEED_OWN_POINTER:
202     impl->h_data_owned = data;
203     impl->h_data_borrowed = NULL;
204     impl->h_data = data;
205     break;
206   case CEED_USE_POINTER:
207     impl->h_data_borrowed = data;
208     impl->h_data = data;
209     break;
210   }
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 //------------------------------------------------------------------------------
216 // Set data from device
217 //------------------------------------------------------------------------------
218 static int CeedQFunctionContextSetDataDevice_Cuda(
219   const CeedQFunctionContext ctx, const CeedCopyMode copy_mode, void *data) {
220   int ierr;
221   Ceed ceed;
222   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
223   CeedQFunctionContext_Cuda *impl;
224   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
225 
226   ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
227   impl->d_data_owned = NULL;
228   switch (copy_mode) {
229   case CEED_COPY_VALUES: {
230     size_t ctxsize;
231     ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
232     ierr = cudaMalloc((void **)&impl->d_data_owned, ctxsize);
233     CeedChk_Cu(ceed, ierr);
234     impl->d_data_borrowed = NULL;
235     impl->d_data = impl->d_data_owned;
236     ierr = cudaMemcpy(impl->d_data, data, ctxsize,
237                       cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr);
238   } break;
239   case CEED_OWN_POINTER:
240     impl->d_data_owned = data;
241     impl->d_data_borrowed = NULL;
242     impl->d_data = data;
243     break;
244   case CEED_USE_POINTER:
245     impl->d_data_owned = NULL;
246     impl->d_data_borrowed = data;
247     impl->d_data = data;
248     break;
249   }
250 
251   return CEED_ERROR_SUCCESS;
252 }
253 
254 //------------------------------------------------------------------------------
255 // Set the data used by a user context,
256 //   freeing any previously allocated data if applicable
257 //------------------------------------------------------------------------------
258 static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx,
259     const CeedMemType mem_type, const CeedCopyMode copy_mode, void *data) {
260   int ierr;
261   Ceed ceed;
262   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
263 
264   ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr);
265   switch (mem_type) {
266   case CEED_MEM_HOST:
267     return CeedQFunctionContextSetDataHost_Cuda(ctx, copy_mode, data);
268   case CEED_MEM_DEVICE:
269     return CeedQFunctionContextSetDataDevice_Cuda(ctx, copy_mode, data);
270   }
271 
272   return CEED_ERROR_UNSUPPORTED;
273 }
274 
275 //------------------------------------------------------------------------------
276 // Take data
277 //------------------------------------------------------------------------------
278 static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx,
279     const CeedMemType mem_type, void *data) {
280   int ierr;
281   Ceed ceed;
282   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
283   CeedQFunctionContext_Cuda *impl;
284   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
285 
286   // Sync data to requested mem_type
287   bool need_sync = false;
288   ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync);
289   CeedChkBackend(ierr);
290   if (need_sync) {
291     ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr);
292   }
293 
294   // Update pointer
295   switch (mem_type) {
296   case CEED_MEM_HOST:
297     *(void **)data = impl->h_data_borrowed;
298     impl->h_data_borrowed = NULL;
299     impl->h_data = NULL;
300     break;
301   case CEED_MEM_DEVICE:
302     *(void **)data = impl->d_data_borrowed;
303     impl->d_data_borrowed = NULL;
304     impl->d_data = NULL;
305     break;
306   }
307 
308   return CEED_ERROR_SUCCESS;
309 }
310 
311 //------------------------------------------------------------------------------
312 // Core logic for GetData.
313 //   If a different memory type is most up to date, this will perform a copy
314 //------------------------------------------------------------------------------
315 static int CeedQFunctionContextGetDataCore_Cuda(const CeedQFunctionContext ctx,
316     const CeedMemType mem_type, void *data) {
317   int ierr;
318   Ceed ceed;
319   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
320   CeedQFunctionContext_Cuda *impl;
321   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
322 
323   // Sync data to requested mem_type
324   bool need_sync = false;
325   ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync);
326   CeedChkBackend(ierr);
327   if (need_sync) {
328     ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr);
329   }
330 
331   // Update pointer
332   switch (mem_type) {
333   case CEED_MEM_HOST:
334     *(void **)data = impl->h_data;
335     break;
336   case CEED_MEM_DEVICE:
337     *(void **)data = impl->d_data;
338     break;
339   }
340 
341   return CEED_ERROR_SUCCESS;
342 }
343 
344 //------------------------------------------------------------------------------
345 // Get read-only access to the data
346 //------------------------------------------------------------------------------
347 static int CeedQFunctionContextGetDataRead_Cuda(const CeedQFunctionContext ctx,
348     const CeedMemType mem_type, void *data) {
349   return CeedQFunctionContextGetDataCore_Cuda(ctx, mem_type, data);
350 }
351 
352 //------------------------------------------------------------------------------
353 // Get read/write access to the data
354 //------------------------------------------------------------------------------
355 static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx,
356     const CeedMemType mem_type, void *data) {
357   int ierr;
358   CeedQFunctionContext_Cuda *impl;
359   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
360 
361   ierr = CeedQFunctionContextGetDataCore_Cuda(ctx, mem_type, data);
362   CeedChkBackend(ierr);
363 
364   // Mark only pointer for requested memory as valid
365   ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr);
366   switch (mem_type) {
367   case CEED_MEM_HOST:
368     impl->h_data = *(void **)data;
369     break;
370   case CEED_MEM_DEVICE:
371     impl->d_data = *(void **)data;
372     break;
373   }
374 
375   return CEED_ERROR_SUCCESS;
376 }
377 
378 //------------------------------------------------------------------------------
379 // Destroy the user context
380 //------------------------------------------------------------------------------
381 static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) {
382   int ierr;
383   Ceed ceed;
384   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
385   CeedQFunctionContext_Cuda *impl;
386   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
387 
388   ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
389   ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
390   ierr = CeedFree(&impl); CeedChkBackend(ierr);
391 
392   return CEED_ERROR_SUCCESS;
393 }
394 
395 //------------------------------------------------------------------------------
396 // QFunctionContext Create
397 //------------------------------------------------------------------------------
398 int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) {
399   int ierr;
400   CeedQFunctionContext_Cuda *impl;
401   Ceed ceed;
402   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
403 
404   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData",
405                                 CeedQFunctionContextHasValidData_Cuda);
406   CeedChkBackend(ierr);
407   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx,
408                                 "HasBorrowedDataOfType",
409                                 CeedQFunctionContextHasBorrowedDataOfType_Cuda);
410   CeedChkBackend(ierr);
411   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData",
412                                 CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr);
413   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData",
414                                 CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr);
415   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData",
416                                 CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr);
417   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetDataRead",
418                                 CeedQFunctionContextGetDataRead_Cuda); CeedChkBackend(ierr);
419   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy",
420                                 CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr);
421 
422   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
423   ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr);
424 
425   return CEED_ERROR_SUCCESS;
426 }
427 //------------------------------------------------------------------------------
428