1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <cuda_runtime.h> 20 #include <string.h> 21 #include "ceed-cuda-ref.h" 22 23 //------------------------------------------------------------------------------ 24 // * Bytes used 25 //------------------------------------------------------------------------------ 26 static inline size_t bytes(const CeedQFunctionContext ctx) { 27 int ierr; 28 size_t ctxsize; 29 ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr); 30 return ctxsize; 31 } 32 33 //------------------------------------------------------------------------------ 34 // Sync host to device 35 //------------------------------------------------------------------------------ 36 static inline int CeedQFunctionContextSyncH2D_Cuda( 37 const CeedQFunctionContext ctx) { 38 int ierr; 39 Ceed ceed; 40 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 41 CeedQFunctionContext_Cuda *impl; 42 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 43 44 if (!impl->h_data) 45 // LCOV_EXCL_START 46 return CeedError(ceed, CEED_ERROR_BACKEND, 47 "No valid host data to sync to device"); 48 // LCOV_EXCL_STOP 49 50 if (impl->d_data_borrowed) { 51 impl->d_data = impl->d_data_borrowed; 52 } else if (impl->d_data_owned) { 53 impl->d_data = impl->d_data_owned; 54 } else { 55 ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); 56 CeedChk_Cu(ceed, ierr); 57 impl->d_data = impl->d_data_owned; 58 } 59 60 ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx), 61 cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); 62 63 return CEED_ERROR_SUCCESS; 64 } 65 66 //------------------------------------------------------------------------------ 67 // Sync device to host 68 //------------------------------------------------------------------------------ 69 static inline int CeedQFunctionContextSyncD2H_Cuda( 70 const CeedQFunctionContext ctx) { 71 int ierr; 72 Ceed ceed; 73 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 74 CeedQFunctionContext_Cuda *impl; 75 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 76 77 if (!impl->d_data) 78 // LCOV_EXCL_START 79 return CeedError(ceed, CEED_ERROR_BACKEND, 80 "No valid device data to sync to host"); 81 // LCOV_EXCL_STOP 82 83 if (impl->h_data_borrowed) { 84 impl->h_data = impl->h_data_borrowed; 85 } else if (impl->h_data_owned) { 86 impl->h_data = impl->h_data_owned; 87 } else { 88 ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); 89 CeedChkBackend(ierr); 90 impl->h_data = impl->h_data_owned; 91 } 92 93 ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx), 94 cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr); 95 96 return CEED_ERROR_SUCCESS; 97 } 98 99 //------------------------------------------------------------------------------ 100 // Sync data of type 101 //------------------------------------------------------------------------------ 102 static inline int CeedQFunctionContextSync_Cuda( 103 const CeedQFunctionContext ctx, CeedMemType mem_type) { 104 switch (mem_type) { 105 case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx); 106 case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx); 107 } 108 return CEED_ERROR_UNSUPPORTED; 109 } 110 111 //------------------------------------------------------------------------------ 112 // Set all pointers as invalid 113 //------------------------------------------------------------------------------ 114 static inline int CeedQFunctionContextSetAllInvalid_Cuda( 115 const CeedQFunctionContext ctx) { 116 int ierr; 117 CeedQFunctionContext_Cuda *impl; 118 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 119 120 impl->h_data = NULL; 121 impl->d_data = NULL; 122 123 return CEED_ERROR_SUCCESS; 124 } 125 126 //------------------------------------------------------------------------------ 127 // Check if ctx has valid data 128 //------------------------------------------------------------------------------ 129 static inline int CeedQFunctionContextHasValidData_Cuda( 130 const CeedQFunctionContext ctx, bool *has_valid_data) { 131 int ierr; 132 CeedQFunctionContext_Cuda *impl; 133 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 134 135 *has_valid_data = !!impl->h_data || !!impl->d_data; 136 137 return CEED_ERROR_SUCCESS; 138 } 139 140 //------------------------------------------------------------------------------ 141 // Check if ctx has borrowed data 142 //------------------------------------------------------------------------------ 143 static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda( 144 const CeedQFunctionContext ctx, CeedMemType mem_type, 145 bool *has_borrowed_data_of_type) { 146 int ierr; 147 CeedQFunctionContext_Cuda *impl; 148 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 149 150 switch (mem_type) { 151 case CEED_MEM_HOST: 152 *has_borrowed_data_of_type = !!impl->h_data_borrowed; 153 break; 154 case CEED_MEM_DEVICE: 155 *has_borrowed_data_of_type = !!impl->d_data_borrowed; 156 break; 157 } 158 159 return CEED_ERROR_SUCCESS; 160 } 161 162 //------------------------------------------------------------------------------ 163 // Check if data of given type needs sync 164 //------------------------------------------------------------------------------ 165 static inline int CeedQFunctionContextNeedSync_Cuda( 166 const CeedQFunctionContext ctx, CeedMemType mem_type, bool *need_sync) { 167 int ierr; 168 CeedQFunctionContext_Cuda *impl; 169 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 170 171 bool has_valid_data = true; 172 ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data); 173 CeedChkBackend(ierr); 174 switch (mem_type) { 175 case CEED_MEM_HOST: 176 *need_sync = has_valid_data && !impl->h_data; 177 break; 178 case CEED_MEM_DEVICE: 179 *need_sync = has_valid_data && !impl->d_data; 180 break; 181 } 182 183 return CEED_ERROR_SUCCESS; 184 } 185 186 //------------------------------------------------------------------------------ 187 // Set data from host 188 //------------------------------------------------------------------------------ 189 static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx, 190 const CeedCopyMode copy_mode, void *data) { 191 int ierr; 192 CeedQFunctionContext_Cuda *impl; 193 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 194 195 ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); 196 switch (copy_mode) { 197 case CEED_COPY_VALUES: { 198 ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr); 199 impl->h_data_borrowed = NULL; 200 impl->h_data = impl->h_data_owned; 201 memcpy(impl->h_data, data, bytes(ctx)); 202 } break; 203 case CEED_OWN_POINTER: 204 impl->h_data_owned = data; 205 impl->h_data_borrowed = NULL; 206 impl->h_data = data; 207 break; 208 case CEED_USE_POINTER: 209 impl->h_data_borrowed = data; 210 impl->h_data = data; 211 break; 212 } 213 214 return CEED_ERROR_SUCCESS; 215 } 216 217 //------------------------------------------------------------------------------ 218 // Set data from device 219 //------------------------------------------------------------------------------ 220 static int CeedQFunctionContextSetDataDevice_Cuda( 221 const CeedQFunctionContext ctx, const CeedCopyMode copy_mode, void *data) { 222 int ierr; 223 Ceed ceed; 224 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 225 CeedQFunctionContext_Cuda *impl; 226 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 227 228 ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); 229 impl->d_data_owned = NULL; 230 switch (copy_mode) { 231 case CEED_COPY_VALUES: 232 ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); 233 CeedChk_Cu(ceed, ierr); 234 impl->d_data_borrowed = NULL; 235 impl->d_data = impl->d_data_owned; 236 ierr = cudaMemcpy(impl->d_data, data, bytes(ctx), 237 cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr); 238 break; 239 case CEED_OWN_POINTER: 240 impl->d_data_owned = data; 241 impl->d_data_borrowed = NULL; 242 impl->d_data = data; 243 break; 244 case CEED_USE_POINTER: 245 impl->d_data_owned = NULL; 246 impl->d_data_borrowed = data; 247 impl->d_data = data; 248 break; 249 } 250 251 return CEED_ERROR_SUCCESS; 252 } 253 254 //------------------------------------------------------------------------------ 255 // Set the data used by a user context, 256 // freeing any previously allocated data if applicable 257 //------------------------------------------------------------------------------ 258 static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx, 259 const CeedMemType mem_type, const CeedCopyMode copy_mode, void *data) { 260 int ierr; 261 Ceed ceed; 262 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 263 264 ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); 265 switch (mem_type) { 266 case CEED_MEM_HOST: 267 return CeedQFunctionContextSetDataHost_Cuda(ctx, copy_mode, data); 268 case CEED_MEM_DEVICE: 269 return CeedQFunctionContextSetDataDevice_Cuda(ctx, copy_mode, data); 270 } 271 272 return CEED_ERROR_UNSUPPORTED; 273 } 274 275 //------------------------------------------------------------------------------ 276 // Take data 277 //------------------------------------------------------------------------------ 278 static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx, 279 const CeedMemType mem_type, void *data) { 280 int ierr; 281 Ceed ceed; 282 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 283 CeedQFunctionContext_Cuda *impl; 284 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 285 286 // Sync data to requested mem_type 287 bool need_sync = false; 288 ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync); 289 CeedChkBackend(ierr); 290 if (need_sync) { 291 ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr); 292 } 293 294 // Update pointer 295 switch (mem_type) { 296 case CEED_MEM_HOST: 297 *(void **)data = impl->h_data_borrowed; 298 impl->h_data_borrowed = NULL; 299 impl->h_data = NULL; 300 break; 301 case CEED_MEM_DEVICE: 302 *(void **)data = impl->d_data_borrowed; 303 impl->d_data_borrowed = NULL; 304 impl->d_data = NULL; 305 break; 306 } 307 308 return CEED_ERROR_SUCCESS; 309 } 310 311 //------------------------------------------------------------------------------ 312 // Get data 313 //------------------------------------------------------------------------------ 314 static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx, 315 const CeedMemType mem_type, void *data) { 316 int ierr; 317 Ceed ceed; 318 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 319 CeedQFunctionContext_Cuda *impl; 320 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 321 322 // Sync data to requested mem_type 323 bool need_sync = false; 324 ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mem_type, &need_sync); 325 CeedChkBackend(ierr); 326 if (need_sync) { 327 ierr = CeedQFunctionContextSync_Cuda(ctx, mem_type); CeedChkBackend(ierr); 328 } 329 330 // Update pointer 331 switch (mem_type) { 332 case CEED_MEM_HOST: 333 *(void **)data = impl->h_data; 334 break; 335 case CEED_MEM_DEVICE: 336 *(void **)data = impl->d_data; 337 break; 338 } 339 340 // Mark only pointer for requested memory as valid 341 ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); 342 switch (mem_type) { 343 case CEED_MEM_HOST: 344 impl->h_data = *(void **)data; 345 break; 346 case CEED_MEM_DEVICE: 347 impl->d_data = *(void **)data; 348 break; 349 } 350 351 return CEED_ERROR_SUCCESS; 352 } 353 354 //------------------------------------------------------------------------------ 355 // Destroy the user context 356 //------------------------------------------------------------------------------ 357 static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) { 358 int ierr; 359 Ceed ceed; 360 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 361 CeedQFunctionContext_Cuda *impl; 362 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 363 364 ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); 365 ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); 366 ierr = CeedFree(&impl); CeedChkBackend(ierr); 367 368 return CEED_ERROR_SUCCESS; 369 } 370 371 //------------------------------------------------------------------------------ 372 // QFunctionContext Create 373 //------------------------------------------------------------------------------ 374 int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) { 375 int ierr; 376 CeedQFunctionContext_Cuda *impl; 377 Ceed ceed; 378 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 379 380 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", 381 CeedQFunctionContextHasValidData_Cuda); 382 CeedChkBackend(ierr); 383 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, 384 "HasBorrowedDataOfType", 385 CeedQFunctionContextHasBorrowedDataOfType_Cuda); 386 CeedChkBackend(ierr); 387 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", 388 CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr); 389 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", 390 CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr); 391 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData", 392 CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr); 393 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", 394 CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr); 395 396 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 397 ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); 398 399 return CEED_ERROR_SUCCESS; 400 } 401 //------------------------------------------------------------------------------ 402