xref: /libCEED/rust/libceed-sys/c-src/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c (revision 28bfd0b742d5bb98b1c0887e682aa0983923c5c6)
10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
40d0321e0SJeremy L Thompson //
50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and
80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed.
90d0321e0SJeremy L Thompson //
100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early
150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
160d0321e0SJeremy L Thompson 
170d0321e0SJeremy L Thompson #include <ceed/ceed.h>
180d0321e0SJeremy L Thompson #include <ceed/backend.h>
190d0321e0SJeremy L Thompson #include <cuda_runtime.h>
200d0321e0SJeremy L Thompson #include <string.h>
210d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h"
220d0321e0SJeremy L Thompson 
230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
240d0321e0SJeremy L Thompson // * Bytes used
250d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
260d0321e0SJeremy L Thompson static inline size_t bytes(const CeedQFunctionContext ctx) {
270d0321e0SJeremy L Thompson   int ierr;
280d0321e0SJeremy L Thompson   size_t ctxsize;
290d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
300d0321e0SJeremy L Thompson   return ctxsize;
310d0321e0SJeremy L Thompson }
320d0321e0SJeremy L Thompson 
330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
340d0321e0SJeremy L Thompson // Sync host to device
350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
360d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSyncH2D_Cuda(
370d0321e0SJeremy L Thompson   const CeedQFunctionContext ctx) {
380d0321e0SJeremy L Thompson   int ierr;
390d0321e0SJeremy L Thompson   Ceed ceed;
400d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
410d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
420d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
430d0321e0SJeremy L Thompson 
440d0321e0SJeremy L Thompson   if (!impl->h_data)
450d0321e0SJeremy L Thompson     // LCOV_EXCL_START
460d0321e0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
470d0321e0SJeremy L Thompson                      "No valid host data to sync to device");
480d0321e0SJeremy L Thompson   // LCOV_EXCL_STOP
490d0321e0SJeremy L Thompson 
500d0321e0SJeremy L Thompson   if (impl->d_data_borrowed) {
510d0321e0SJeremy L Thompson     impl->d_data = impl->d_data_borrowed;
520d0321e0SJeremy L Thompson   } else if (impl->d_data_owned) {
530d0321e0SJeremy L Thompson     impl->d_data = impl->d_data_owned;
540d0321e0SJeremy L Thompson   } else {
550d0321e0SJeremy L Thompson     ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx));
560d0321e0SJeremy L Thompson     CeedChk_Cu(ceed, ierr);
570d0321e0SJeremy L Thompson     impl->d_data = impl->d_data_owned;
580d0321e0SJeremy L Thompson   }
590d0321e0SJeremy L Thompson 
600d0321e0SJeremy L Thompson   ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx),
610d0321e0SJeremy L Thompson                     cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr);
620d0321e0SJeremy L Thompson 
630d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
640d0321e0SJeremy L Thompson }
650d0321e0SJeremy L Thompson 
660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
670d0321e0SJeremy L Thompson // Sync device to host
680d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
690d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSyncD2H_Cuda(
700d0321e0SJeremy L Thompson   const CeedQFunctionContext ctx) {
710d0321e0SJeremy L Thompson   int ierr;
720d0321e0SJeremy L Thompson   Ceed ceed;
730d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
740d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
750d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
760d0321e0SJeremy L Thompson 
770d0321e0SJeremy L Thompson   if (!impl->d_data)
780d0321e0SJeremy L Thompson     // LCOV_EXCL_START
790d0321e0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
800d0321e0SJeremy L Thompson                      "No valid device data to sync to host");
810d0321e0SJeremy L Thompson   // LCOV_EXCL_STOP
820d0321e0SJeremy L Thompson 
830d0321e0SJeremy L Thompson   if (impl->h_data_borrowed) {
840d0321e0SJeremy L Thompson     impl->h_data = impl->h_data_borrowed;
850d0321e0SJeremy L Thompson   } else if (impl->h_data_owned) {
860d0321e0SJeremy L Thompson     impl->h_data = impl->h_data_owned;
870d0321e0SJeremy L Thompson   } else {
880d0321e0SJeremy L Thompson     ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned);
890d0321e0SJeremy L Thompson     CeedChkBackend(ierr);
900d0321e0SJeremy L Thompson     impl->h_data = impl->h_data_owned;
910d0321e0SJeremy L Thompson   }
920d0321e0SJeremy L Thompson 
930d0321e0SJeremy L Thompson   ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx),
940d0321e0SJeremy L Thompson                     cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr);
950d0321e0SJeremy L Thompson 
960d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
970d0321e0SJeremy L Thompson }
980d0321e0SJeremy L Thompson 
990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1000d0321e0SJeremy L Thompson // Sync data of type
1010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1020d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSync_Cuda(
10343c928f4SJeremy L Thompson   const CeedQFunctionContext ctx, CeedMemType mem_type) {
10443c928f4SJeremy L Thompson   switch (mem_type) {
1050d0321e0SJeremy L Thompson   case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx);
1060d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx);
1070d0321e0SJeremy L Thompson   }
1080d0321e0SJeremy L Thompson   return CEED_ERROR_UNSUPPORTED;
1090d0321e0SJeremy L Thompson }
1100d0321e0SJeremy L Thompson 
1110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1120d0321e0SJeremy L Thompson // Set all pointers as invalid
1130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1140d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSetAllInvalid_Cuda(
1150d0321e0SJeremy L Thompson   const CeedQFunctionContext ctx) {
1160d0321e0SJeremy L Thompson   int ierr;
1170d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
1180d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
1190d0321e0SJeremy L Thompson 
1200d0321e0SJeremy L Thompson   impl->h_data = NULL;
1210d0321e0SJeremy L Thompson   impl->d_data = NULL;
1220d0321e0SJeremy L Thompson 
1230d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1240d0321e0SJeremy L Thompson }
1250d0321e0SJeremy L Thompson 
1260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1270d0321e0SJeremy L Thompson // Check if ctx has valid data
1280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1290d0321e0SJeremy L Thompson static inline int CeedQFunctionContextHasValidData_Cuda(
1300d0321e0SJeremy L Thompson   const CeedQFunctionContext ctx, bool *has_valid_data) {
1310d0321e0SJeremy L Thompson   int ierr;
1320d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
1330d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
1340d0321e0SJeremy L Thompson 
1350d0321e0SJeremy L Thompson   *has_valid_data = !!impl->h_data || !!impl->d_data;
1360d0321e0SJeremy L Thompson 
1370d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1380d0321e0SJeremy L Thompson }
1390d0321e0SJeremy L Thompson 
1400d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1410d0321e0SJeremy L Thompson // Check if ctx has borrowed data
1420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1430d0321e0SJeremy L Thompson static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda(
14443c928f4SJeremy L Thompson   const CeedQFunctionContext ctx, CeedMemType mem_type,
1450d0321e0SJeremy L Thompson   bool *has_borrowed_data_of_type) {
1460d0321e0SJeremy L Thompson   int ierr;
1470d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
1480d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
1490d0321e0SJeremy L Thompson 
15043c928f4SJeremy L Thompson   switch (mem_type) {
1510d0321e0SJeremy L Thompson   case CEED_MEM_HOST:
1520d0321e0SJeremy L Thompson     *has_borrowed_data_of_type = !!impl->h_data_borrowed;
1530d0321e0SJeremy L Thompson     break;
1540d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE:
1550d0321e0SJeremy L Thompson     *has_borrowed_data_of_type = !!impl->d_data_borrowed;
1560d0321e0SJeremy L Thompson     break;
1570d0321e0SJeremy L Thompson   }
1580d0321e0SJeremy L Thompson 
1590d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1600d0321e0SJeremy L Thompson }
1610d0321e0SJeremy L Thompson 
1620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1630d0321e0SJeremy L Thompson // Check if data of given type needs sync
1640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1650d0321e0SJeremy L Thompson static inline int CeedQFunctionContextNeedSync_Cuda(
16643c928f4SJeremy L Thompson   const CeedQFunctionContext ctx, CeedMemType mem_type, bool *need_sync) {
1670d0321e0SJeremy L Thompson   int ierr;
1680d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
1690d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
1700d0321e0SJeremy L Thompson 
1710d0321e0SJeremy L Thompson   bool has_valid_data = true;
1720d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data);
1730d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
17443c928f4SJeremy L Thompson   switch (mem_type) {
1750d0321e0SJeremy L Thompson   case CEED_MEM_HOST:
1760d0321e0SJeremy L Thompson     *need_sync = has_valid_data && !impl->h_data;
1770d0321e0SJeremy L Thompson     break;
1780d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE:
1790d0321e0SJeremy L Thompson     *need_sync = has_valid_data && !impl->d_data;
1800d0321e0SJeremy L Thompson     break;
1810d0321e0SJeremy L Thompson   }
1820d0321e0SJeremy L Thompson 
1830d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1840d0321e0SJeremy L Thompson }
1850d0321e0SJeremy L Thompson 
1860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1870d0321e0SJeremy L Thompson // Set data from host
1880d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1890d0321e0SJeremy L Thompson static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx,
19043c928f4SJeremy L Thompson     const CeedCopyMode copy_mode, void *data) {
1910d0321e0SJeremy L Thompson   int ierr;
1920d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
1930d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
1940d0321e0SJeremy L Thompson 
1950d0321e0SJeremy L Thompson   ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
19643c928f4SJeremy L Thompson   switch (copy_mode) {
1970d0321e0SJeremy L Thompson   case CEED_COPY_VALUES: {
1980d0321e0SJeremy L Thompson     ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr);
1990d0321e0SJeremy L Thompson     impl->h_data_borrowed = NULL;
2000d0321e0SJeremy L Thompson     impl->h_data = impl->h_data_owned;
2010d0321e0SJeremy L Thompson     memcpy(impl->h_data, data, bytes(ctx));
2020d0321e0SJeremy L Thompson   } break;
2030d0321e0SJeremy L Thompson   case CEED_OWN_POINTER:
2040d0321e0SJeremy L Thompson     impl->h_data_owned = data;
2050d0321e0SJeremy L Thompson     impl->h_data_borrowed = NULL;
2060d0321e0SJeremy L Thompson     impl->h_data = data;
2070d0321e0SJeremy L Thompson     break;
2080d0321e0SJeremy L Thompson   case CEED_USE_POINTER:
2090d0321e0SJeremy L Thompson     impl->h_data_borrowed = data;
2100d0321e0SJeremy L Thompson     impl->h_data = data;
2110d0321e0SJeremy L Thompson     break;
2120d0321e0SJeremy L Thompson   }
2130d0321e0SJeremy L Thompson 
2140d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2150d0321e0SJeremy L Thompson }
2160d0321e0SJeremy L Thompson 
2170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2180d0321e0SJeremy L Thompson // Set data from device
2190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2200d0321e0SJeremy L Thompson static int CeedQFunctionContextSetDataDevice_Cuda(
22143c928f4SJeremy L Thompson   const CeedQFunctionContext ctx, const CeedCopyMode copy_mode, void *data) {
2220d0321e0SJeremy L Thompson   int ierr;
2230d0321e0SJeremy L Thompson   Ceed ceed;
2240d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
2250d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
2260d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
2270d0321e0SJeremy L Thompson 
2280d0321e0SJeremy L Thompson   ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
2290d0321e0SJeremy L Thompson   impl->d_data_owned = NULL;
23043c928f4SJeremy L Thompson   switch (copy_mode) {
2310d0321e0SJeremy L Thompson   case CEED_COPY_VALUES:
2320d0321e0SJeremy L Thompson     ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx));
2330d0321e0SJeremy L Thompson     CeedChk_Cu(ceed, ierr);
2340d0321e0SJeremy L Thompson     impl->d_data_borrowed = NULL;
2350d0321e0SJeremy L Thompson     impl->d_data = impl->d_data_owned;
2360d0321e0SJeremy L Thompson     ierr = cudaMemcpy(impl->d_data, data, bytes(ctx),
2370d0321e0SJeremy L Thompson                       cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr);
2380d0321e0SJeremy L Thompson     break;
2390d0321e0SJeremy L Thompson   case CEED_OWN_POINTER:
2400d0321e0SJeremy L Thompson     impl->d_data_owned = data;
2410d0321e0SJeremy L Thompson     impl->d_data_borrowed = NULL;
2420d0321e0SJeremy L Thompson     impl->d_data = data;
2430d0321e0SJeremy L Thompson     break;
2440d0321e0SJeremy L Thompson   case CEED_USE_POINTER:
2450d0321e0SJeremy L Thompson     impl->d_data_owned = NULL;
2460d0321e0SJeremy L Thompson     impl->d_data_borrowed = data;
2470d0321e0SJeremy L Thompson     impl->d_data = data;
2480d0321e0SJeremy L Thompson     break;
2490d0321e0SJeremy L Thompson   }
2500d0321e0SJeremy L Thompson 
2510d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2520d0321e0SJeremy L Thompson }
2530d0321e0SJeremy L Thompson 
2540d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2550d0321e0SJeremy L Thompson // Set the data used by a user context,
2560d0321e0SJeremy L Thompson //   freeing any previously allocated data if applicable
2570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2580d0321e0SJeremy L Thompson static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx,
25943c928f4SJeremy L Thompson     const CeedMemType mem_type, const CeedCopyMode copy_mode, void *data) {
2600d0321e0SJeremy L Thompson   int ierr;
2610d0321e0SJeremy L Thompson   Ceed ceed;
2620d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
2630d0321e0SJeremy L Thompson 
2640d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr);
26543c928f4SJeremy L Thompson   switch (mem_type) {
2660d0321e0SJeremy L Thompson   case CEED_MEM_HOST:
26743c928f4SJeremy L Thompson     return CeedQFunctionContextSetDataHost_Cuda(ctx, copy_mode, data);
2680d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE:
26943c928f4SJeremy L Thompson     return CeedQFunctionContextSetDataDevice_Cuda(ctx, copy_mode, data);
2700d0321e0SJeremy L Thompson   }
2710d0321e0SJeremy L Thompson 
2720d0321e0SJeremy L Thompson   return CEED_ERROR_UNSUPPORTED;
2730d0321e0SJeremy L Thompson }
2740d0321e0SJeremy L Thompson 
2750d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2760d0321e0SJeremy L Thompson // Take data
2770d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2780d0321e0SJeremy L Thompson static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx,
27943c928f4SJeremy L Thompson     const CeedMemType mem_type, void *data) {
2800d0321e0SJeremy L Thompson   int ierr;
2810d0321e0SJeremy L Thompson   Ceed ceed;
2820d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
2830d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
2840d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
2850d0321e0SJeremy L Thompson 
28643c928f4SJeremy L Thompson   // Sync data to requested mem_type
2870d0321e0SJeremy L Thompson   bool need_sync = false;
28843c928f4SJeremy L Thompson   ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync);
2890d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
2900d0321e0SJeremy L Thompson   if (need_sync) {
29143c928f4SJeremy L Thompson     ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr);
2920d0321e0SJeremy L Thompson   }
2930d0321e0SJeremy L Thompson 
2940d0321e0SJeremy L Thompson   // Update pointer
29543c928f4SJeremy L Thompson   switch (mem_type) {
2960d0321e0SJeremy L Thompson   case CEED_MEM_HOST:
2970d0321e0SJeremy L Thompson     *(void **)data = impl->h_data_borrowed;
2980d0321e0SJeremy L Thompson     impl->h_data_borrowed = NULL;
2990d0321e0SJeremy L Thompson     impl->h_data = NULL;
3000d0321e0SJeremy L Thompson     break;
3010d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE:
3020d0321e0SJeremy L Thompson     *(void **)data = impl->d_data_borrowed;
3030d0321e0SJeremy L Thompson     impl->d_data_borrowed = NULL;
3040d0321e0SJeremy L Thompson     impl->d_data = NULL;
3050d0321e0SJeremy L Thompson     break;
3060d0321e0SJeremy L Thompson   }
3070d0321e0SJeremy L Thompson 
3080d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3090d0321e0SJeremy L Thompson }
3100d0321e0SJeremy L Thompson 
3110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
312*28bfd0b7SJeremy L Thompson // Core logic for GetData.
313*28bfd0b7SJeremy L Thompson //   If a different memory type is most up to date, this will perform a copy
3140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
315*28bfd0b7SJeremy L Thompson static int CeedQFunctionContextGetDataCore_Cuda(const CeedQFunctionContext ctx,
31643c928f4SJeremy L Thompson     const CeedMemType mem_type, void *data) {
3170d0321e0SJeremy L Thompson   int ierr;
3180d0321e0SJeremy L Thompson   Ceed ceed;
3190d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
3200d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
3210d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
3220d0321e0SJeremy L Thompson 
32343c928f4SJeremy L Thompson   // Sync data to requested mem_type
3240d0321e0SJeremy L Thompson   bool need_sync = false;
32543c928f4SJeremy L Thompson   ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync);
3260d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
3270d0321e0SJeremy L Thompson   if (need_sync) {
32843c928f4SJeremy L Thompson     ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr);
3290d0321e0SJeremy L Thompson   }
3300d0321e0SJeremy L Thompson 
3310d0321e0SJeremy L Thompson   // Update pointer
33243c928f4SJeremy L Thompson   switch (mem_type) {
3330d0321e0SJeremy L Thompson   case CEED_MEM_HOST:
3340d0321e0SJeremy L Thompson     *(void **)data = impl->h_data;
3350d0321e0SJeremy L Thompson     break;
3360d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE:
3370d0321e0SJeremy L Thompson     *(void **)data = impl->d_data;
3380d0321e0SJeremy L Thompson     break;
3390d0321e0SJeremy L Thompson   }
3400d0321e0SJeremy L Thompson 
341*28bfd0b7SJeremy L Thompson   return CEED_ERROR_SUCCESS;
342*28bfd0b7SJeremy L Thompson }
343*28bfd0b7SJeremy L Thompson 
344*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------
345*28bfd0b7SJeremy L Thompson // Get read-only access to the data
346*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------
347*28bfd0b7SJeremy L Thompson static int CeedQFunctionContextGetDataRead_Cuda(const CeedQFunctionContext ctx,
348*28bfd0b7SJeremy L Thompson     const CeedMemType mem_type, void *data) {
349*28bfd0b7SJeremy L Thompson   return CeedQFunctionContextGetDataCore_Cuda(ctx, mem_type, data);
350*28bfd0b7SJeremy L Thompson }
351*28bfd0b7SJeremy L Thompson 
352*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------
353*28bfd0b7SJeremy L Thompson // Get read/write access to the data
354*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------
355*28bfd0b7SJeremy L Thompson static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx,
356*28bfd0b7SJeremy L Thompson     const CeedMemType mem_type, void *data) {
357*28bfd0b7SJeremy L Thompson   int ierr;
358*28bfd0b7SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
359*28bfd0b7SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
360*28bfd0b7SJeremy L Thompson 
361*28bfd0b7SJeremy L Thompson   ierr = CeedQFunctionContextGetDataCore_Cuda(ctx, mem_type, data);
362*28bfd0b7SJeremy L Thompson   CeedChkBackend(ierr);
363*28bfd0b7SJeremy L Thompson 
3640d0321e0SJeremy L Thompson   // Mark only pointer for requested memory as valid
3650d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr);
36643c928f4SJeremy L Thompson   switch (mem_type) {
3670d0321e0SJeremy L Thompson   case CEED_MEM_HOST:
3680d0321e0SJeremy L Thompson     impl->h_data = *(void **)data;
3690d0321e0SJeremy L Thompson     break;
3700d0321e0SJeremy L Thompson   case CEED_MEM_DEVICE:
3710d0321e0SJeremy L Thompson     impl->d_data = *(void **)data;
3720d0321e0SJeremy L Thompson     break;
3730d0321e0SJeremy L Thompson   }
3740d0321e0SJeremy L Thompson 
3750d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3760d0321e0SJeremy L Thompson }
3770d0321e0SJeremy L Thompson 
3780d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3790d0321e0SJeremy L Thompson // Destroy the user context
3800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3810d0321e0SJeremy L Thompson static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) {
3820d0321e0SJeremy L Thompson   int ierr;
3830d0321e0SJeremy L Thompson   Ceed ceed;
3840d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
3850d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
3860d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
3870d0321e0SJeremy L Thompson 
3880d0321e0SJeremy L Thompson   ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
3890d0321e0SJeremy L Thompson   ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
3900d0321e0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
3910d0321e0SJeremy L Thompson 
3920d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3930d0321e0SJeremy L Thompson }
3940d0321e0SJeremy L Thompson 
3950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3960d0321e0SJeremy L Thompson // QFunctionContext Create
3970d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3980d0321e0SJeremy L Thompson int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) {
3990d0321e0SJeremy L Thompson   int ierr;
4000d0321e0SJeremy L Thompson   CeedQFunctionContext_Cuda *impl;
4010d0321e0SJeremy L Thompson   Ceed ceed;
4020d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
4030d0321e0SJeremy L Thompson 
4040d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData",
4050d0321e0SJeremy L Thompson                                 CeedQFunctionContextHasValidData_Cuda);
4060d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
4070d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx,
4080d0321e0SJeremy L Thompson                                 "HasBorrowedDataOfType",
4090d0321e0SJeremy L Thompson                                 CeedQFunctionContextHasBorrowedDataOfType_Cuda);
4100d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
4110d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData",
4120d0321e0SJeremy L Thompson                                 CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr);
4130d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData",
4140d0321e0SJeremy L Thompson                                 CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr);
4150d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData",
4160d0321e0SJeremy L Thompson                                 CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr);
417*28bfd0b7SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetDataRead",
418*28bfd0b7SJeremy L Thompson                                 CeedQFunctionContextGetDataRead_Cuda); CeedChkBackend(ierr);
4190d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy",
4200d0321e0SJeremy L Thompson                                 CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr);
4210d0321e0SJeremy L Thompson 
4220d0321e0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
4230d0321e0SJeremy L Thompson   ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr);
4240d0321e0SJeremy L Thompson 
4250d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4260d0321e0SJeremy L Thompson }
4270d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
428