1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <cuda_runtime.h> 20 #include <string.h> 21 #include "ceed-cuda-ref.h" 22 23 //------------------------------------------------------------------------------ 24 // * Bytes used 25 //------------------------------------------------------------------------------ 26 static inline size_t bytes(const CeedQFunctionContext ctx) { 27 int ierr; 28 size_t ctxsize; 29 ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr); 30 return ctxsize; 31 } 32 33 //------------------------------------------------------------------------------ 34 // Sync host to device 35 //------------------------------------------------------------------------------ 36 static inline int CeedQFunctionContextSyncH2D_Cuda( 37 const CeedQFunctionContext ctx) { 38 int ierr; 39 Ceed ceed; 40 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 41 CeedQFunctionContext_Cuda *impl; 42 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 43 44 if (!impl->h_data) 45 // LCOV_EXCL_START 46 return CeedError(ceed, CEED_ERROR_BACKEND, 47 "No valid host data to sync to device"); 48 // LCOV_EXCL_STOP 49 50 if (impl->d_data_borrowed) { 51 impl->d_data = impl->d_data_borrowed; 52 } else if (impl->d_data_owned) { 53 impl->d_data = impl->d_data_owned; 54 } else { 55 ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); 56 CeedChk_Cu(ceed, ierr); 57 impl->d_data = impl->d_data_owned; 58 } 59 60 ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx), 61 cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); 62 63 return CEED_ERROR_SUCCESS; 64 } 65 66 //------------------------------------------------------------------------------ 67 // Sync device to host 68 //------------------------------------------------------------------------------ 69 static inline int CeedQFunctionContextSyncD2H_Cuda( 70 const CeedQFunctionContext ctx) { 71 int ierr; 72 Ceed ceed; 73 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 74 CeedQFunctionContext_Cuda *impl; 75 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 76 77 if (!impl->d_data) 78 // LCOV_EXCL_START 79 return CeedError(ceed, CEED_ERROR_BACKEND, 80 "No valid device data to sync to host"); 81 // LCOV_EXCL_STOP 82 83 if (impl->h_data_borrowed) { 84 impl->h_data = impl->h_data_borrowed; 85 } else if (impl->h_data_owned) { 86 impl->h_data = impl->h_data_owned; 87 } else { 88 ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); 89 CeedChkBackend(ierr); 90 impl->h_data = impl->h_data_owned; 91 } 92 93 ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx), 94 cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr); 95 96 return CEED_ERROR_SUCCESS; 97 } 98 99 //------------------------------------------------------------------------------ 100 // Sync data of type 101 //------------------------------------------------------------------------------ 102 static inline int CeedQFunctionContextSync_Cuda( 103 const CeedQFunctionContext ctx, CeedMemType mtype) { 104 switch (mtype) { 105 case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx); 106 case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx); 107 } 108 return CEED_ERROR_UNSUPPORTED; 109 } 110 111 //------------------------------------------------------------------------------ 112 // Set all pointers as invalid 113 //------------------------------------------------------------------------------ 114 static inline int CeedQFunctionContextSetAllInvalid_Cuda( 115 const CeedQFunctionContext ctx) { 116 int ierr; 117 CeedQFunctionContext_Cuda *impl; 118 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 119 120 impl->h_data = NULL; 121 impl->d_data = NULL; 122 123 return CEED_ERROR_SUCCESS; 124 } 125 126 //------------------------------------------------------------------------------ 127 // Check if ctx has valid data 128 //------------------------------------------------------------------------------ 129 static inline int CeedQFunctionContextHasValidData_Cuda( 130 const CeedQFunctionContext ctx, bool *has_valid_data) { 131 int ierr; 132 CeedQFunctionContext_Cuda *impl; 133 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 134 135 *has_valid_data = !!impl->h_data || !!impl->d_data; 136 137 return CEED_ERROR_SUCCESS; 138 } 139 140 //------------------------------------------------------------------------------ 141 // Check if ctx has borrowed data 142 //------------------------------------------------------------------------------ 143 static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda( 144 const CeedQFunctionContext ctx, CeedMemType mtype, 145 bool *has_borrowed_data_of_type) { 146 int ierr; 147 CeedQFunctionContext_Cuda *impl; 148 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 149 150 switch (mtype) { 151 case CEED_MEM_HOST: 152 *has_borrowed_data_of_type = !!impl->h_data_borrowed; 153 break; 154 case CEED_MEM_DEVICE: 155 *has_borrowed_data_of_type = !!impl->d_data_borrowed; 156 break; 157 } 158 159 return CEED_ERROR_SUCCESS; 160 } 161 162 //------------------------------------------------------------------------------ 163 // Check if data of given type needs sync 164 //------------------------------------------------------------------------------ 165 static inline int CeedQFunctionContextNeedSync_Cuda( 166 const CeedQFunctionContext ctx, CeedMemType mtype, bool *need_sync) { 167 int ierr; 168 CeedQFunctionContext_Cuda *impl; 169 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 170 171 bool has_valid_data = true; 172 ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data); 173 CeedChkBackend(ierr); 174 switch (mtype) { 175 case CEED_MEM_HOST: 176 *need_sync = has_valid_data && !impl->h_data; 177 break; 178 case CEED_MEM_DEVICE: 179 *need_sync = has_valid_data && !impl->d_data; 180 break; 181 } 182 183 return CEED_ERROR_SUCCESS; 184 } 185 186 //------------------------------------------------------------------------------ 187 // Set data from host 188 //------------------------------------------------------------------------------ 189 static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx, 190 const CeedCopyMode cmode, void *data) { 191 int ierr; 192 CeedQFunctionContext_Cuda *impl; 193 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 194 195 ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); 196 switch (cmode) { 197 case CEED_COPY_VALUES: { 198 ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr); 199 impl->h_data_borrowed = NULL; 200 impl->h_data = impl->h_data_owned; 201 memcpy(impl->h_data, data, bytes(ctx)); 202 } break; 203 case CEED_OWN_POINTER: 204 impl->h_data_owned = data; 205 impl->h_data_borrowed = NULL; 206 impl->h_data = data; 207 break; 208 case CEED_USE_POINTER: 209 impl->h_data_borrowed = data; 210 impl->h_data = data; 211 break; 212 } 213 214 return CEED_ERROR_SUCCESS; 215 } 216 217 //------------------------------------------------------------------------------ 218 // Set data from device 219 //------------------------------------------------------------------------------ 220 static int CeedQFunctionContextSetDataDevice_Cuda( 221 const CeedQFunctionContext ctx, const CeedCopyMode cmode, void *data) { 222 int ierr; 223 Ceed ceed; 224 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 225 CeedQFunctionContext_Cuda *impl; 226 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 227 228 ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); 229 impl->d_data_owned = NULL; 230 switch (cmode) { 231 case CEED_COPY_VALUES: 232 ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); 233 CeedChk_Cu(ceed, ierr); 234 impl->d_data_borrowed = NULL; 235 impl->d_data = impl->d_data_owned; 236 ierr = cudaMemcpy(impl->d_data, data, bytes(ctx), 237 cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr); 238 break; 239 case CEED_OWN_POINTER: 240 impl->d_data_owned = data; 241 impl->d_data_borrowed = NULL; 242 impl->d_data = data; 243 break; 244 case CEED_USE_POINTER: 245 impl->d_data_owned = NULL; 246 impl->d_data_borrowed = data; 247 impl->d_data = data; 248 break; 249 } 250 251 return CEED_ERROR_SUCCESS; 252 } 253 254 //------------------------------------------------------------------------------ 255 // Set the data used by a user context, 256 // freeing any previously allocated data if applicable 257 //------------------------------------------------------------------------------ 258 static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx, 259 const CeedMemType mtype, const CeedCopyMode cmode, void *data) { 260 int ierr; 261 Ceed ceed; 262 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 263 264 ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); 265 switch (mtype) { 266 case CEED_MEM_HOST: 267 return CeedQFunctionContextSetDataHost_Cuda(ctx, cmode, data); 268 case CEED_MEM_DEVICE: 269 return CeedQFunctionContextSetDataDevice_Cuda(ctx, cmode, data); 270 } 271 272 return CEED_ERROR_UNSUPPORTED; 273 } 274 275 //------------------------------------------------------------------------------ 276 // Take data 277 //------------------------------------------------------------------------------ 278 static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx, 279 const CeedMemType mtype, void *data) { 280 int ierr; 281 Ceed ceed; 282 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 283 CeedQFunctionContext_Cuda *impl; 284 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 285 286 // Sync data to requested memtype 287 bool need_sync = false; 288 ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mtype, &need_sync); 289 CeedChkBackend(ierr); 290 if (need_sync) { 291 ierr = CeedQFunctionContextSync_Cuda(ctx, mtype); CeedChkBackend(ierr); 292 } 293 294 // Update pointer 295 switch (mtype) { 296 case CEED_MEM_HOST: 297 *(void **)data = impl->h_data_borrowed; 298 impl->h_data_borrowed = NULL; 299 impl->h_data = NULL; 300 break; 301 case CEED_MEM_DEVICE: 302 *(void **)data = impl->d_data_borrowed; 303 impl->d_data_borrowed = NULL; 304 impl->d_data = NULL; 305 break; 306 } 307 308 return CEED_ERROR_SUCCESS; 309 } 310 311 //------------------------------------------------------------------------------ 312 // Get data 313 //------------------------------------------------------------------------------ 314 static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx, 315 const CeedMemType mtype, void *data) { 316 int ierr; 317 Ceed ceed; 318 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 319 CeedQFunctionContext_Cuda *impl; 320 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 321 322 // Sync data to requested memtype 323 bool need_sync = false; 324 ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mtype, &need_sync); 325 CeedChkBackend(ierr); 326 if (need_sync) { 327 ierr = CeedQFunctionContextSync_Cuda(ctx, mtype); CeedChkBackend(ierr); 328 } 329 330 // Update pointer 331 switch (mtype) { 332 case CEED_MEM_HOST: 333 *(void **)data = impl->h_data; 334 break; 335 case CEED_MEM_DEVICE: 336 *(void **)data = impl->d_data; 337 break; 338 } 339 340 // Mark only pointer for requested memory as valid 341 ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); 342 switch (mtype) { 343 case CEED_MEM_HOST: 344 impl->h_data = *(void **)data; 345 break; 346 case CEED_MEM_DEVICE: 347 impl->d_data = *(void **)data; 348 break; 349 } 350 351 return CEED_ERROR_SUCCESS; 352 } 353 354 //------------------------------------------------------------------------------ 355 // Restore data obtained using CeedQFunctionContextGetData() 356 //------------------------------------------------------------------------------ 357 static int CeedQFunctionContextRestoreData_Cuda( 358 const CeedQFunctionContext ctx) { 359 return CEED_ERROR_SUCCESS; 360 } 361 362 //------------------------------------------------------------------------------ 363 // Destroy the user context 364 //------------------------------------------------------------------------------ 365 static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) { 366 int ierr; 367 Ceed ceed; 368 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 369 CeedQFunctionContext_Cuda *impl; 370 ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); 371 372 ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); 373 ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); 374 ierr = CeedFree(&impl); CeedChkBackend(ierr); 375 376 return CEED_ERROR_SUCCESS; 377 } 378 379 //------------------------------------------------------------------------------ 380 // QFunctionContext Create 381 //------------------------------------------------------------------------------ 382 int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) { 383 int ierr; 384 CeedQFunctionContext_Cuda *impl; 385 Ceed ceed; 386 ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); 387 388 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", 389 CeedQFunctionContextHasValidData_Cuda); 390 CeedChkBackend(ierr); 391 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, 392 "HasBorrowedDataOfType", 393 CeedQFunctionContextHasBorrowedDataOfType_Cuda); 394 CeedChkBackend(ierr); 395 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", 396 CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr); 397 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", 398 CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr); 399 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData", 400 CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr); 401 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreData", 402 CeedQFunctionContextRestoreData_Cuda); CeedChkBackend(ierr); 403 ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", 404 CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr); 405 406 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 407 ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 //------------------------------------------------------------------------------ 412