10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 40d0321e0SJeremy L Thompson // 50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 90d0321e0SJeremy L Thompson // 100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson #include <ceed/ceed.h> 180d0321e0SJeremy L Thompson #include <ceed/backend.h> 190d0321e0SJeremy L Thompson #include <cuda_runtime.h> 200d0321e0SJeremy L Thompson #include <string.h> 210d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h" 220d0321e0SJeremy L Thompson 230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 240d0321e0SJeremy L Thompson // * Bytes used 250d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 260d0321e0SJeremy L Thompson static inline size_t bytes(const CeedQFunctionContext ctx) { 270d0321e0SJeremy L Thompson int ierr; 280d0321e0SJeremy L Thompson size_t ctxsize; 290d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr); 300d0321e0SJeremy L Thompson return ctxsize; 310d0321e0SJeremy L Thompson } 320d0321e0SJeremy L Thompson 330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 340d0321e0SJeremy L Thompson // Sync host to device 350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 360d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSyncH2D_Cuda( 370d0321e0SJeremy L Thompson const CeedQFunctionContext ctx) { 380d0321e0SJeremy L Thompson int ierr; 390d0321e0SJeremy L Thompson Ceed ceed; 400d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 410d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 420d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 430d0321e0SJeremy L Thompson 440d0321e0SJeremy L Thompson if (!impl->h_data) 450d0321e0SJeremy L Thompson // LCOV_EXCL_START 460d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 470d0321e0SJeremy L Thompson "No valid host data to sync to device"); 480d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 490d0321e0SJeremy L Thompson 500d0321e0SJeremy L Thompson if (impl->d_data_borrowed) { 510d0321e0SJeremy L Thompson impl->d_data = impl->d_data_borrowed; 520d0321e0SJeremy L Thompson } else if (impl->d_data_owned) { 530d0321e0SJeremy L Thompson impl->d_data = impl->d_data_owned; 540d0321e0SJeremy L Thompson } else { 550d0321e0SJeremy L Thompson ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); 560d0321e0SJeremy L Thompson CeedChk_Cu(ceed, ierr); 570d0321e0SJeremy L Thompson impl->d_data = impl->d_data_owned; 580d0321e0SJeremy L Thompson } 590d0321e0SJeremy L Thompson 600d0321e0SJeremy L Thompson ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx), 610d0321e0SJeremy L Thompson cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); 620d0321e0SJeremy L Thompson 630d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 640d0321e0SJeremy L Thompson } 650d0321e0SJeremy L Thompson 660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 670d0321e0SJeremy L Thompson // Sync device to host 680d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 690d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSyncD2H_Cuda( 700d0321e0SJeremy L Thompson const CeedQFunctionContext ctx) { 710d0321e0SJeremy L Thompson int ierr; 720d0321e0SJeremy L Thompson Ceed ceed; 730d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 740d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 750d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 760d0321e0SJeremy L Thompson 770d0321e0SJeremy L Thompson if (!impl->d_data) 780d0321e0SJeremy L Thompson // LCOV_EXCL_START 790d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 800d0321e0SJeremy L Thompson "No valid device data to sync to host"); 810d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 820d0321e0SJeremy L Thompson 830d0321e0SJeremy L Thompson if (impl->h_data_borrowed) { 840d0321e0SJeremy L Thompson impl->h_data = impl->h_data_borrowed; 850d0321e0SJeremy L Thompson } else if (impl->h_data_owned) { 860d0321e0SJeremy L Thompson impl->h_data = impl->h_data_owned; 870d0321e0SJeremy L Thompson } else { 880d0321e0SJeremy L Thompson ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); 890d0321e0SJeremy L Thompson CeedChkBackend(ierr); 900d0321e0SJeremy L Thompson impl->h_data = impl->h_data_owned; 910d0321e0SJeremy L Thompson } 920d0321e0SJeremy L Thompson 930d0321e0SJeremy L Thompson ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx), 940d0321e0SJeremy L Thompson cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr); 950d0321e0SJeremy L Thompson 960d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 970d0321e0SJeremy L Thompson } 980d0321e0SJeremy L Thompson 990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1000d0321e0SJeremy L Thompson // Sync data of type 1010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1020d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSync_Cuda( 10343c928f4SJeremy L Thompson const CeedQFunctionContext ctx, CeedMemType mem_type) { 10443c928f4SJeremy L Thompson switch (mem_type) { 1050d0321e0SJeremy L Thompson case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx); 1060d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx); 1070d0321e0SJeremy L Thompson } 1080d0321e0SJeremy L Thompson return CEED_ERROR_UNSUPPORTED; 1090d0321e0SJeremy L Thompson } 1100d0321e0SJeremy L Thompson 1110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1120d0321e0SJeremy L Thompson // Set all pointers as invalid 1130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1140d0321e0SJeremy L Thompson static inline int CeedQFunctionContextSetAllInvalid_Cuda( 1150d0321e0SJeremy L Thompson const CeedQFunctionContext ctx) { 1160d0321e0SJeremy L Thompson int ierr; 1170d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 1180d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 1190d0321e0SJeremy L Thompson 1200d0321e0SJeremy L Thompson impl->h_data = NULL; 1210d0321e0SJeremy L Thompson impl->d_data = NULL; 1220d0321e0SJeremy L Thompson 1230d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1240d0321e0SJeremy L Thompson } 1250d0321e0SJeremy L Thompson 1260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1270d0321e0SJeremy L Thompson // Check if ctx has valid data 1280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1290d0321e0SJeremy L Thompson static inline int CeedQFunctionContextHasValidData_Cuda( 1300d0321e0SJeremy L Thompson const CeedQFunctionContext ctx, bool *has_valid_data) { 1310d0321e0SJeremy L Thompson int ierr; 1320d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 1330d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 1340d0321e0SJeremy L Thompson 1350d0321e0SJeremy L Thompson *has_valid_data = !!impl->h_data || !!impl->d_data; 1360d0321e0SJeremy L Thompson 1370d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1380d0321e0SJeremy L Thompson } 1390d0321e0SJeremy L Thompson 1400d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1410d0321e0SJeremy L Thompson // Check if ctx has borrowed data 1420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1430d0321e0SJeremy L Thompson static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda( 14443c928f4SJeremy L Thompson const CeedQFunctionContext ctx, CeedMemType mem_type, 1450d0321e0SJeremy L Thompson bool *has_borrowed_data_of_type) { 1460d0321e0SJeremy L Thompson int ierr; 1470d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 1480d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 1490d0321e0SJeremy L Thompson 15043c928f4SJeremy L Thompson switch (mem_type) { 1510d0321e0SJeremy L Thompson case CEED_MEM_HOST: 1520d0321e0SJeremy L Thompson *has_borrowed_data_of_type = !!impl->h_data_borrowed; 1530d0321e0SJeremy L Thompson break; 1540d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 1550d0321e0SJeremy L Thompson *has_borrowed_data_of_type = !!impl->d_data_borrowed; 1560d0321e0SJeremy L Thompson break; 1570d0321e0SJeremy L Thompson } 1580d0321e0SJeremy L Thompson 1590d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1600d0321e0SJeremy L Thompson } 1610d0321e0SJeremy L Thompson 1620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1630d0321e0SJeremy L Thompson // Check if data of given type needs sync 1640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1650d0321e0SJeremy L Thompson static inline int CeedQFunctionContextNeedSync_Cuda( 16643c928f4SJeremy L Thompson const CeedQFunctionContext ctx, CeedMemType mem_type, bool *need_sync) { 1670d0321e0SJeremy L Thompson int ierr; 1680d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 1690d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 1700d0321e0SJeremy L Thompson 1710d0321e0SJeremy L Thompson bool has_valid_data = true; 1720d0321e0SJeremy L Thompson ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data); 1730d0321e0SJeremy L Thompson CeedChkBackend(ierr); 17443c928f4SJeremy L Thompson switch (mem_type) { 1750d0321e0SJeremy L Thompson case CEED_MEM_HOST: 1760d0321e0SJeremy L Thompson *need_sync = has_valid_data && !impl->h_data; 1770d0321e0SJeremy L Thompson break; 1780d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 1790d0321e0SJeremy L Thompson *need_sync = has_valid_data && !impl->d_data; 1800d0321e0SJeremy L Thompson break; 1810d0321e0SJeremy L Thompson } 1820d0321e0SJeremy L Thompson 1830d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1840d0321e0SJeremy L Thompson } 1850d0321e0SJeremy L Thompson 1860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1870d0321e0SJeremy L Thompson // Set data from host 1880d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1890d0321e0SJeremy L Thompson static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx, 19043c928f4SJeremy L Thompson const CeedCopyMode copy_mode, void *data) { 1910d0321e0SJeremy L Thompson int ierr; 1920d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 1930d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 1940d0321e0SJeremy L Thompson 1950d0321e0SJeremy L Thompson ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); 19643c928f4SJeremy L Thompson switch (copy_mode) { 1970d0321e0SJeremy L Thompson case CEED_COPY_VALUES: { 1980d0321e0SJeremy L Thompson ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr); 1990d0321e0SJeremy L Thompson impl->h_data_borrowed = NULL; 2000d0321e0SJeremy L Thompson impl->h_data = impl->h_data_owned; 2010d0321e0SJeremy L Thompson memcpy(impl->h_data, data, bytes(ctx)); 2020d0321e0SJeremy L Thompson } break; 2030d0321e0SJeremy L Thompson case CEED_OWN_POINTER: 2040d0321e0SJeremy L Thompson impl->h_data_owned = data; 2050d0321e0SJeremy L Thompson impl->h_data_borrowed = NULL; 2060d0321e0SJeremy L Thompson impl->h_data = data; 2070d0321e0SJeremy L Thompson break; 2080d0321e0SJeremy L Thompson case CEED_USE_POINTER: 2090d0321e0SJeremy L Thompson impl->h_data_borrowed = data; 2100d0321e0SJeremy L Thompson impl->h_data = data; 2110d0321e0SJeremy L Thompson break; 2120d0321e0SJeremy L Thompson } 2130d0321e0SJeremy L Thompson 2140d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2150d0321e0SJeremy L Thompson } 2160d0321e0SJeremy L Thompson 2170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2180d0321e0SJeremy L Thompson // Set data from device 2190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2200d0321e0SJeremy L Thompson static int CeedQFunctionContextSetDataDevice_Cuda( 22143c928f4SJeremy L Thompson const CeedQFunctionContext ctx, const CeedCopyMode copy_mode, void *data) { 2220d0321e0SJeremy L Thompson int ierr; 2230d0321e0SJeremy L Thompson Ceed ceed; 2240d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 2250d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 2260d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 2270d0321e0SJeremy L Thompson 2280d0321e0SJeremy L Thompson ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); 2290d0321e0SJeremy L Thompson impl->d_data_owned = NULL; 23043c928f4SJeremy L Thompson switch (copy_mode) { 2310d0321e0SJeremy L Thompson case CEED_COPY_VALUES: 2320d0321e0SJeremy L Thompson ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); 2330d0321e0SJeremy L Thompson CeedChk_Cu(ceed, ierr); 2340d0321e0SJeremy L Thompson impl->d_data_borrowed = NULL; 2350d0321e0SJeremy L Thompson impl->d_data = impl->d_data_owned; 2360d0321e0SJeremy L Thompson ierr = cudaMemcpy(impl->d_data, data, bytes(ctx), 2370d0321e0SJeremy L Thompson cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr); 2380d0321e0SJeremy L Thompson break; 2390d0321e0SJeremy L Thompson case CEED_OWN_POINTER: 2400d0321e0SJeremy L Thompson impl->d_data_owned = data; 2410d0321e0SJeremy L Thompson impl->d_data_borrowed = NULL; 2420d0321e0SJeremy L Thompson impl->d_data = data; 2430d0321e0SJeremy L Thompson break; 2440d0321e0SJeremy L Thompson case CEED_USE_POINTER: 2450d0321e0SJeremy L Thompson impl->d_data_owned = NULL; 2460d0321e0SJeremy L Thompson impl->d_data_borrowed = data; 2470d0321e0SJeremy L Thompson impl->d_data = data; 2480d0321e0SJeremy L Thompson break; 2490d0321e0SJeremy L Thompson } 2500d0321e0SJeremy L Thompson 2510d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2520d0321e0SJeremy L Thompson } 2530d0321e0SJeremy L Thompson 2540d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2550d0321e0SJeremy L Thompson // Set the data used by a user context, 2560d0321e0SJeremy L Thompson // freeing any previously allocated data if applicable 2570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2580d0321e0SJeremy L Thompson static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx, 25943c928f4SJeremy L Thompson const CeedMemType mem_type, const CeedCopyMode copy_mode, void *data) { 2600d0321e0SJeremy L Thompson int ierr; 2610d0321e0SJeremy L Thompson Ceed ceed; 2620d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 2630d0321e0SJeremy L Thompson 2640d0321e0SJeremy L Thompson ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); 26543c928f4SJeremy L Thompson switch (mem_type) { 2660d0321e0SJeremy L Thompson case CEED_MEM_HOST: 26743c928f4SJeremy L Thompson return CeedQFunctionContextSetDataHost_Cuda(ctx, copy_mode, data); 2680d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 26943c928f4SJeremy L Thompson return CeedQFunctionContextSetDataDevice_Cuda(ctx, copy_mode, data); 2700d0321e0SJeremy L Thompson } 2710d0321e0SJeremy L Thompson 2720d0321e0SJeremy L Thompson return CEED_ERROR_UNSUPPORTED; 2730d0321e0SJeremy L Thompson } 2740d0321e0SJeremy L Thompson 2750d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2760d0321e0SJeremy L Thompson // Take data 2770d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2780d0321e0SJeremy L Thompson static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx, 27943c928f4SJeremy L Thompson const CeedMemType mem_type, void *data) { 2800d0321e0SJeremy L Thompson int ierr; 2810d0321e0SJeremy L Thompson Ceed ceed; 2820d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 2830d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 2840d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 2850d0321e0SJeremy L Thompson 28643c928f4SJeremy L Thompson // Sync data to requested mem_type 2870d0321e0SJeremy L Thompson bool need_sync = false; 28843c928f4SJeremy L Thompson ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync); 2890d0321e0SJeremy L Thompson CeedChkBackend(ierr); 2900d0321e0SJeremy L Thompson if (need_sync) { 29143c928f4SJeremy L Thompson ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr); 2920d0321e0SJeremy L Thompson } 2930d0321e0SJeremy L Thompson 2940d0321e0SJeremy L Thompson // Update pointer 29543c928f4SJeremy L Thompson switch (mem_type) { 2960d0321e0SJeremy L Thompson case CEED_MEM_HOST: 2970d0321e0SJeremy L Thompson *(void **)data = impl->h_data_borrowed; 2980d0321e0SJeremy L Thompson impl->h_data_borrowed = NULL; 2990d0321e0SJeremy L Thompson impl->h_data = NULL; 3000d0321e0SJeremy L Thompson break; 3010d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 3020d0321e0SJeremy L Thompson *(void **)data = impl->d_data_borrowed; 3030d0321e0SJeremy L Thompson impl->d_data_borrowed = NULL; 3040d0321e0SJeremy L Thompson impl->d_data = NULL; 3050d0321e0SJeremy L Thompson break; 3060d0321e0SJeremy L Thompson } 3070d0321e0SJeremy L Thompson 3080d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3090d0321e0SJeremy L Thompson } 3100d0321e0SJeremy L Thompson 3110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 312*28bfd0b7SJeremy L Thompson // Core logic for GetData. 313*28bfd0b7SJeremy L Thompson // If a different memory type is most up to date, this will perform a copy 3140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 315*28bfd0b7SJeremy L Thompson static int CeedQFunctionContextGetDataCore_Cuda(const CeedQFunctionContext ctx, 31643c928f4SJeremy L Thompson const CeedMemType mem_type, void *data) { 3170d0321e0SJeremy L Thompson int ierr; 3180d0321e0SJeremy L Thompson Ceed ceed; 3190d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 3200d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 3210d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 3220d0321e0SJeremy L Thompson 32343c928f4SJeremy L Thompson // Sync data to requested mem_type 3240d0321e0SJeremy L Thompson bool need_sync = false; 32543c928f4SJeremy L Thompson ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync); 3260d0321e0SJeremy L Thompson CeedChkBackend(ierr); 3270d0321e0SJeremy L Thompson if (need_sync) { 32843c928f4SJeremy L Thompson ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr); 3290d0321e0SJeremy L Thompson } 3300d0321e0SJeremy L Thompson 3310d0321e0SJeremy L Thompson // Update pointer 33243c928f4SJeremy L Thompson switch (mem_type) { 3330d0321e0SJeremy L Thompson case CEED_MEM_HOST: 3340d0321e0SJeremy L Thompson *(void **)data = impl->h_data; 3350d0321e0SJeremy L Thompson break; 3360d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 3370d0321e0SJeremy L Thompson *(void **)data = impl->d_data; 3380d0321e0SJeremy L Thompson break; 3390d0321e0SJeremy L Thompson } 3400d0321e0SJeremy L Thompson 341*28bfd0b7SJeremy L Thompson return CEED_ERROR_SUCCESS; 342*28bfd0b7SJeremy L Thompson } 343*28bfd0b7SJeremy L Thompson 344*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------ 345*28bfd0b7SJeremy L Thompson // Get read-only access to the data 346*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------ 347*28bfd0b7SJeremy L Thompson static int CeedQFunctionContextGetDataRead_Cuda(const CeedQFunctionContext ctx, 348*28bfd0b7SJeremy L Thompson const CeedMemType mem_type, void *data) { 349*28bfd0b7SJeremy L Thompson return CeedQFunctionContextGetDataCore_Cuda(ctx, mem_type, data); 350*28bfd0b7SJeremy L Thompson } 351*28bfd0b7SJeremy L Thompson 352*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------ 353*28bfd0b7SJeremy L Thompson // Get read/write access to the data 354*28bfd0b7SJeremy L Thompson //------------------------------------------------------------------------------ 355*28bfd0b7SJeremy L Thompson static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx, 356*28bfd0b7SJeremy L Thompson const CeedMemType mem_type, void *data) { 357*28bfd0b7SJeremy L Thompson int ierr; 358*28bfd0b7SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 359*28bfd0b7SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 360*28bfd0b7SJeremy L Thompson 361*28bfd0b7SJeremy L Thompson ierr = CeedQFunctionContextGetDataCore_Cuda(ctx, mem_type, data); 362*28bfd0b7SJeremy L Thompson CeedChkBackend(ierr); 363*28bfd0b7SJeremy L Thompson 3640d0321e0SJeremy L Thompson // Mark only pointer for requested memory as valid 3650d0321e0SJeremy L Thompson ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); 36643c928f4SJeremy L Thompson switch (mem_type) { 3670d0321e0SJeremy L Thompson case CEED_MEM_HOST: 3680d0321e0SJeremy L Thompson impl->h_data = *(void **)data; 3690d0321e0SJeremy L Thompson break; 3700d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 3710d0321e0SJeremy L Thompson impl->d_data = *(void **)data; 3720d0321e0SJeremy L Thompson break; 3730d0321e0SJeremy L Thompson } 3740d0321e0SJeremy L Thompson 3750d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3760d0321e0SJeremy L Thompson } 3770d0321e0SJeremy L Thompson 3780d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3790d0321e0SJeremy L Thompson // Destroy the user context 3800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3810d0321e0SJeremy L Thompson static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) { 3820d0321e0SJeremy L Thompson int ierr; 3830d0321e0SJeremy L Thompson Ceed ceed; 3840d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 3850d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 3860d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 3870d0321e0SJeremy L Thompson 3880d0321e0SJeremy L Thompson ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); 3890d0321e0SJeremy L Thompson ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); 3900d0321e0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 3910d0321e0SJeremy L Thompson 3920d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3930d0321e0SJeremy L Thompson } 3940d0321e0SJeremy L Thompson 3950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3960d0321e0SJeremy L Thompson // QFunctionContext Create 3970d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3980d0321e0SJeremy L Thompson int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) { 3990d0321e0SJeremy L Thompson int ierr; 4000d0321e0SJeremy L Thompson CeedQFunctionContext_Cuda *impl; 4010d0321e0SJeremy L Thompson Ceed ceed; 4020d0321e0SJeremy L Thompson ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 4030d0321e0SJeremy L Thompson 4040d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", 4050d0321e0SJeremy L Thompson CeedQFunctionContextHasValidData_Cuda); 4060d0321e0SJeremy L Thompson CeedChkBackend(ierr); 4070d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, 4080d0321e0SJeremy L Thompson "HasBorrowedDataOfType", 4090d0321e0SJeremy L Thompson CeedQFunctionContextHasBorrowedDataOfType_Cuda); 4100d0321e0SJeremy L Thompson CeedChkBackend(ierr); 4110d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", 4120d0321e0SJeremy L Thompson CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr); 4130d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", 4140d0321e0SJeremy L Thompson CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr); 4150d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData", 4160d0321e0SJeremy L Thompson CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr); 417*28bfd0b7SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetDataRead", 418*28bfd0b7SJeremy L Thompson CeedQFunctionContextGetDataRead_Cuda); CeedChkBackend(ierr); 4190d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", 4200d0321e0SJeremy L Thompson CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr); 4210d0321e0SJeremy L Thompson 4220d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 4230d0321e0SJeremy L Thompson ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); 4240d0321e0SJeremy L Thompson 4250d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4260d0321e0SJeremy L Thompson } 4270d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 428