1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/backend.h> 9 #include <ceed/ceed.h> 10 11 #include <cmath> 12 #include <string> 13 #include <sycl/sycl.hpp> 14 15 #include "ceed-sycl-ref.hpp" 16 17 //------------------------------------------------------------------------------ 18 // Check if host/device sync is needed 19 //------------------------------------------------------------------------------ 20 static inline int CeedVectorNeedSync_Sycl(const CeedVector vec, CeedMemType mem_type, bool *need_sync) { 21 bool has_valid_array = false; 22 CeedVector_Sycl *impl; 23 24 CeedCallBackend(CeedVectorGetData(vec, &impl)); 25 CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array)); 26 switch (mem_type) { 27 case CEED_MEM_HOST: 28 *need_sync = has_valid_array && !impl->h_array; 29 break; 30 case CEED_MEM_DEVICE: 31 *need_sync = has_valid_array && !impl->d_array; 32 break; 33 } 34 return CEED_ERROR_SUCCESS; 35 } 36 37 //------------------------------------------------------------------------------ 38 // Sync host to device 39 //------------------------------------------------------------------------------ 40 static inline int CeedVectorSyncH2D_Sycl(const CeedVector vec) { 41 Ceed ceed; 42 Ceed_Sycl *data; 43 CeedSize length; 44 CeedVector_Sycl *impl; 45 46 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 47 CeedCallBackend(CeedGetData(ceed, &data)); 48 CeedCallBackend(CeedVectorGetData(vec, &impl)); 49 50 CeedCheck(impl->h_array, ceed, CEED_ERROR_BACKEND, "No valid host data to sync to device"); 51 52 CeedCallBackend(CeedVectorGetLength(vec, &length)); 53 if (impl->d_array_borrowed) { 54 impl->d_array = impl->d_array_borrowed; 55 } else if (impl->d_array_owned) { 56 impl->d_array = impl->d_array_owned; 57 } else { 58 CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context)); 59 impl->d_array = impl->d_array_owned; 60 } 61 62 // Copy from host to device 63 std::vector<sycl::event> e; 64 65 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()}; 66 CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(impl->h_array, impl->d_array, length, e).wait_and_throw()); 67 CeedCallBackend(CeedDestroy(&ceed)); 68 return CEED_ERROR_SUCCESS; 69 } 70 71 //------------------------------------------------------------------------------ 72 // Sync device to host 73 //------------------------------------------------------------------------------ 74 static inline int CeedVectorSyncD2H_Sycl(const CeedVector vec) { 75 Ceed ceed; 76 Ceed_Sycl *data; 77 CeedSize length; 78 CeedVector_Sycl *impl; 79 80 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 81 CeedCallBackend(CeedGetData(ceed, &data)); 82 CeedCallBackend(CeedVectorGetData(vec, &impl)); 83 84 CeedCheck(impl->d_array, ceed, CEED_ERROR_BACKEND, "No valid device data to sync to host"); 85 86 CeedCallBackend(CeedVectorGetLength(vec, &length)); 87 if (impl->h_array_borrowed) { 88 impl->h_array = impl->h_array_borrowed; 89 } else if (impl->h_array_owned) { 90 impl->h_array = impl->h_array_owned; 91 } else { 92 CeedCallBackend(CeedCalloc(length, &impl->h_array_owned)); 93 impl->h_array = impl->h_array_owned; 94 } 95 96 // Copy from device to host 97 std::vector<sycl::event> e; 98 99 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()}; 100 CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(impl->d_array, impl->h_array, length, e).wait_and_throw()); 101 CeedCallBackend(CeedDestroy(&ceed)); 102 return CEED_ERROR_SUCCESS; 103 } 104 105 //------------------------------------------------------------------------------ 106 // Sync arrays 107 //------------------------------------------------------------------------------ 108 static int CeedVectorSyncArray_Sycl(const CeedVector vec, CeedMemType mem_type) { 109 bool need_sync = false; 110 111 // Check whether device/host sync is needed 112 CeedCallBackend(CeedVectorNeedSync_Sycl(vec, mem_type, &need_sync)); 113 if (!need_sync) return CEED_ERROR_SUCCESS; 114 115 switch (mem_type) { 116 case CEED_MEM_HOST: 117 return CeedVectorSyncD2H_Sycl(vec); 118 case CEED_MEM_DEVICE: 119 return CeedVectorSyncH2D_Sycl(vec); 120 } 121 return CEED_ERROR_UNSUPPORTED; 122 } 123 124 //------------------------------------------------------------------------------ 125 // Set all pointers as invalid 126 //------------------------------------------------------------------------------ 127 static inline int CeedVectorSetAllInvalid_Sycl(const CeedVector vec) { 128 CeedVector_Sycl *impl; 129 130 CeedCallBackend(CeedVectorGetData(vec, &impl)); 131 impl->h_array = NULL; 132 impl->d_array = NULL; 133 return CEED_ERROR_SUCCESS; 134 } 135 136 //------------------------------------------------------------------------------ 137 // Check if CeedVector has any valid pointer 138 //------------------------------------------------------------------------------ 139 static inline int CeedVectorHasValidArray_Sycl(const CeedVector vec, bool *has_valid_array) { 140 CeedVector_Sycl *impl; 141 142 CeedCallBackend(CeedVectorGetData(vec, &impl)); 143 *has_valid_array = impl->h_array || impl->d_array; 144 return CEED_ERROR_SUCCESS; 145 } 146 147 //------------------------------------------------------------------------------ 148 // Check if has array of given type 149 //------------------------------------------------------------------------------ 150 static inline int CeedVectorHasArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) { 151 CeedVector_Sycl *impl; 152 153 CeedCallBackend(CeedVectorGetData(vec, &impl)); 154 switch (mem_type) { 155 case CEED_MEM_HOST: 156 *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned; 157 break; 158 case CEED_MEM_DEVICE: 159 *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned; 160 break; 161 } 162 return CEED_ERROR_SUCCESS; 163 } 164 165 //------------------------------------------------------------------------------ 166 // Check if has borrowed array of given type 167 //------------------------------------------------------------------------------ 168 static inline int CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) { 169 CeedVector_Sycl *impl; 170 171 CeedCallBackend(CeedVectorGetData(vec, &impl)); 172 switch (mem_type) { 173 case CEED_MEM_HOST: 174 *has_borrowed_array_of_type = impl->h_array_borrowed; 175 break; 176 case CEED_MEM_DEVICE: 177 *has_borrowed_array_of_type = impl->d_array_borrowed; 178 break; 179 } 180 return CEED_ERROR_SUCCESS; 181 } 182 183 //------------------------------------------------------------------------------ 184 // Set array from host 185 //------------------------------------------------------------------------------ 186 static int CeedVectorSetArrayHost_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) { 187 CeedSize length; 188 CeedVector_Sycl *impl; 189 190 CeedCallBackend(CeedVectorGetData(vec, &impl)); 191 CeedCallBackend(CeedVectorGetLength(vec, &length)); 192 193 CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned, 194 (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array)); 195 return CEED_ERROR_SUCCESS; 196 } 197 198 //------------------------------------------------------------------------------ 199 // Set array from device 200 //------------------------------------------------------------------------------ 201 static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) { 202 CeedSize length; 203 Ceed ceed; 204 Ceed_Sycl *data; 205 CeedVector_Sycl *impl; 206 207 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 208 CeedCallBackend(CeedVectorGetData(vec, &impl)); 209 CeedCallBackend(CeedGetData(ceed, &data)); 210 CeedCallBackend(CeedVectorGetLength(vec, &length)); 211 212 // Order queue if needed. 213 std::vector<sycl::event> e; 214 215 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()}; 216 217 switch (copy_mode) { 218 case CEED_COPY_VALUES: { 219 if (!impl->d_array_owned) { 220 CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context)); 221 } 222 if (array) { 223 // Wait for copy to finish and handle exceptions. 224 CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(array, impl->d_array_owned, length, e).wait_and_throw()); 225 } 226 impl->d_array_borrowed = NULL; 227 impl->d_array = impl->d_array_owned; 228 } break; 229 case CEED_OWN_POINTER: 230 if (impl->d_array_owned) { 231 // Wait for all work to finish before freeing memory 232 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw()); 233 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context)); 234 } 235 impl->d_array_owned = array; 236 impl->d_array_borrowed = NULL; 237 impl->d_array = impl->d_array_owned; 238 break; 239 case CEED_USE_POINTER: 240 if (impl->d_array_owned) { 241 // Wait for all work to finish before freeing memory 242 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw()); 243 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context)); 244 } 245 impl->d_array_owned = NULL; 246 impl->d_array_borrowed = array; 247 impl->d_array = impl->d_array_borrowed; 248 break; 249 } 250 CeedCallBackend(CeedDestroy(&ceed)); 251 return CEED_ERROR_SUCCESS; 252 } 253 254 //------------------------------------------------------------------------------ 255 // Set the array used by a vector, 256 // freeing any previously allocated array if applicable 257 //------------------------------------------------------------------------------ 258 static int CeedVectorSetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) { 259 CeedVector_Sycl *impl; 260 261 CeedCallBackend(CeedVectorGetData(vec, &impl)); 262 263 CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec)); 264 switch (mem_type) { 265 case CEED_MEM_HOST: 266 return CeedVectorSetArrayHost_Sycl(vec, copy_mode, array); 267 case CEED_MEM_DEVICE: 268 return CeedVectorSetArrayDevice_Sycl(vec, copy_mode, array); 269 } 270 return CEED_ERROR_UNSUPPORTED; 271 } 272 273 //------------------------------------------------------------------------------ 274 // Set host array to value 275 //------------------------------------------------------------------------------ 276 static int CeedHostSetValue_Sycl(CeedScalar *h_array, CeedSize length, CeedScalar val) { 277 for (CeedSize i = 0; i < length; i++) h_array[i] = val; 278 return CEED_ERROR_SUCCESS; 279 } 280 281 //------------------------------------------------------------------------------ 282 // Set device array to value 283 //------------------------------------------------------------------------------ 284 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, CeedScalar val) { 285 std::vector<sycl::event> e; 286 287 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; 288 sycl_queue.fill(d_array, val, length, e); 289 return CEED_ERROR_SUCCESS; 290 } 291 292 //------------------------------------------------------------------------------ 293 // Set a vector to a value, 294 //------------------------------------------------------------------------------ 295 static int CeedVectorSetValue_Sycl(CeedVector vec, CeedScalar val) { 296 Ceed ceed; 297 Ceed_Sycl *data; 298 CeedSize length; 299 CeedVector_Sycl *impl; 300 301 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 302 CeedCallBackend(CeedGetData(ceed, &data)); 303 CeedCallBackend(CeedDestroy(&ceed)); 304 CeedCallBackend(CeedVectorGetData(vec, &impl)); 305 CeedCallBackend(CeedVectorGetLength(vec, &length)); 306 307 // Set value for synced device/host array 308 if (!impl->d_array && !impl->h_array) { 309 if (impl->d_array_borrowed) { 310 impl->d_array = impl->d_array_borrowed; 311 } else if (impl->h_array_borrowed) { 312 impl->h_array = impl->h_array_borrowed; 313 } else if (impl->d_array_owned) { 314 impl->d_array = impl->d_array_owned; 315 } else if (impl->h_array_owned) { 316 impl->h_array = impl->h_array_owned; 317 } else { 318 CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL)); 319 } 320 } 321 if (impl->d_array) { 322 CeedCallBackend(CeedDeviceSetValue_Sycl(data->sycl_queue, impl->d_array, length, val)); 323 impl->h_array = NULL; 324 } 325 if (impl->h_array) { 326 CeedCallBackend(CeedHostSetValue_Sycl(impl->h_array, length, val)); 327 impl->d_array = NULL; 328 } 329 return CEED_ERROR_SUCCESS; 330 } 331 332 //------------------------------------------------------------------------------ 333 // Vector Take Array 334 //------------------------------------------------------------------------------ 335 static int CeedVectorTakeArray_Sycl(CeedVector vec, CeedMemType mem_type, CeedScalar **array) { 336 Ceed ceed; 337 Ceed_Sycl *data; 338 CeedVector_Sycl *impl; 339 340 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 341 CeedCallBackend(CeedGetData(ceed, &data)); 342 CeedCallBackend(CeedVectorGetData(vec, &impl)); 343 CeedCallBackend(CeedDestroy(&ceed)); 344 CeedCallBackend(CeedVectorGetData(vec, &impl)); 345 346 // Order queue if needed 347 if (!data->sycl_queue.is_in_order()) data->sycl_queue.ext_oneapi_submit_barrier(); 348 349 // Sync array to requested mem_type 350 CeedCallBackend(CeedVectorSyncArray(vec, mem_type)); 351 352 // Update pointer 353 switch (mem_type) { 354 case CEED_MEM_HOST: 355 (*array) = impl->h_array_borrowed; 356 impl->h_array_borrowed = NULL; 357 impl->h_array = NULL; 358 break; 359 case CEED_MEM_DEVICE: 360 (*array) = impl->d_array_borrowed; 361 impl->d_array_borrowed = NULL; 362 impl->d_array = NULL; 363 break; 364 } 365 return CEED_ERROR_SUCCESS; 366 } 367 368 //------------------------------------------------------------------------------ 369 // Core logic for array syncronization for GetArray. 370 // If a different memory type is most up to date, this will perform a copy 371 //------------------------------------------------------------------------------ 372 static int CeedVectorGetArrayCore_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) { 373 CeedVector_Sycl *impl; 374 375 CeedCallBackend(CeedVectorGetData(vec, &impl)); 376 377 // Sync array to requested mem_type 378 CeedCallBackend(CeedVectorSyncArray(vec, mem_type)); 379 380 // Update pointer 381 switch (mem_type) { 382 case CEED_MEM_HOST: 383 *array = impl->h_array; 384 break; 385 case CEED_MEM_DEVICE: 386 *array = impl->d_array; 387 break; 388 } 389 return CEED_ERROR_SUCCESS; 390 } 391 392 //------------------------------------------------------------------------------ 393 // Get read-only access to a vector via the specified mem_type 394 //------------------------------------------------------------------------------ 395 static int CeedVectorGetArrayRead_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) { 396 return CeedVectorGetArrayCore_Sycl(vec, mem_type, (CeedScalar **)array); 397 } 398 399 //------------------------------------------------------------------------------ 400 // Get read/write access to a vector via the specified mem_type 401 //------------------------------------------------------------------------------ 402 static int CeedVectorGetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) { 403 CeedVector_Sycl *impl; 404 405 CeedCallBackend(CeedVectorGetData(vec, &impl)); 406 CeedCallBackend(CeedVectorGetArrayCore_Sycl(vec, mem_type, array)); 407 CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec)); 408 switch (mem_type) { 409 case CEED_MEM_HOST: 410 impl->h_array = *array; 411 break; 412 case CEED_MEM_DEVICE: 413 impl->d_array = *array; 414 break; 415 } 416 return CEED_ERROR_SUCCESS; 417 } 418 419 //------------------------------------------------------------------------------ 420 // Get write access to a vector via the specified mem_type 421 //------------------------------------------------------------------------------ 422 static int CeedVectorGetArrayWrite_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) { 423 bool has_array_of_type = true; 424 CeedVector_Sycl *impl; 425 426 CeedCallBackend(CeedVectorGetData(vec, &impl)); 427 CeedCallBackend(CeedVectorHasArrayOfType_Sycl(vec, mem_type, &has_array_of_type)); 428 if (!has_array_of_type) { 429 // Allocate if array is not yet allocated 430 CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL)); 431 } else { 432 // Select dirty array 433 switch (mem_type) { 434 case CEED_MEM_HOST: 435 if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed; 436 else impl->h_array = impl->h_array_owned; 437 break; 438 case CEED_MEM_DEVICE: 439 if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed; 440 else impl->d_array = impl->d_array_owned; 441 } 442 } 443 return CeedVectorGetArray_Sycl(vec, mem_type, array); 444 } 445 446 //------------------------------------------------------------------------------ 447 // Get the norm of a CeedVector 448 //------------------------------------------------------------------------------ 449 static int CeedVectorNorm_Sycl(CeedVector vec, CeedNormType type, CeedScalar *norm) { 450 Ceed ceed; 451 Ceed_Sycl *data; 452 CeedSize length; 453 const CeedScalar *d_array; 454 CeedVector_Sycl *impl; 455 456 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 457 CeedCallBackend(CeedGetData(ceed, &data)); 458 CeedCallBackend(CeedDestroy(&ceed)); 459 CeedCallBackend(CeedVectorGetData(vec, &impl)); 460 CeedCallBackend(CeedVectorGetLength(vec, &length)); 461 462 // Compute norm 463 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array)); 464 465 std::vector<sycl::event> e; 466 467 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()}; 468 469 switch (type) { 470 case CEED_NORM_1: { 471 // Order queue 472 auto sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}}); 473 data->sycl_queue.parallel_for(length, e, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += abs(d_array[i]); }).wait_and_throw(); 474 } break; 475 case CEED_NORM_2: { 476 // Order queue 477 auto sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}}); 478 data->sycl_queue.parallel_for(length, e, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += (d_array[i] * d_array[i]); }).wait_and_throw(); 479 } break; 480 case CEED_NORM_MAX: { 481 // Order queue 482 auto maxReduction = sycl::reduction(impl->reduction_norm, sycl::maximum<>(), {sycl::property::reduction::initialize_to_identity{}}); 483 data->sycl_queue.parallel_for(length, e, maxReduction, [=](sycl::id<1> i, auto &max) { max.combine(abs(d_array[i])); }).wait_and_throw(); 484 } break; 485 } 486 // L2 norm - square root over reduced value 487 if (type == CEED_NORM_2) *norm = sqrt(*impl->reduction_norm); 488 else *norm = *impl->reduction_norm; 489 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array)); 490 return CEED_ERROR_SUCCESS; 491 } 492 493 //------------------------------------------------------------------------------ 494 // Take reciprocal of a vector on host 495 //------------------------------------------------------------------------------ 496 static int CeedHostReciprocal_Sycl(CeedScalar *h_array, CeedSize length) { 497 for (CeedSize i = 0; i < length; i++) { 498 if (std::fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i]; 499 } 500 return CEED_ERROR_SUCCESS; 501 } 502 503 //------------------------------------------------------------------------------ 504 // Take reciprocal of a vector on device 505 //------------------------------------------------------------------------------ 506 static int CeedDeviceReciprocal_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length) { 507 std::vector<sycl::event> e; 508 509 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; 510 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { 511 if (std::fabs(d_array[i]) > CEED_EPSILON) d_array[i] = 1. / d_array[i]; 512 }); 513 return CEED_ERROR_SUCCESS; 514 } 515 516 //------------------------------------------------------------------------------ 517 // Take reciprocal of a vector 518 //------------------------------------------------------------------------------ 519 static int CeedVectorReciprocal_Sycl(CeedVector vec) { 520 Ceed ceed; 521 Ceed_Sycl *data; 522 CeedSize length; 523 CeedVector_Sycl *impl; 524 525 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 526 CeedCallBackend(CeedGetData(ceed, &data)); 527 CeedCallBackend(CeedDestroy(&ceed)); 528 CeedCallBackend(CeedVectorGetData(vec, &impl)); 529 CeedCallBackend(CeedVectorGetLength(vec, &length)); 530 531 // Set value for synced device/host array 532 if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Sycl(data->sycl_queue, impl->d_array, length)); 533 if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Sycl(impl->h_array, length)); 534 return CEED_ERROR_SUCCESS; 535 } 536 537 //------------------------------------------------------------------------------ 538 // Compute x = alpha x on the host 539 //------------------------------------------------------------------------------ 540 static int CeedHostScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedSize length) { 541 for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha; 542 return CEED_ERROR_SUCCESS; 543 } 544 545 //------------------------------------------------------------------------------ 546 // Compute x = alpha x on device 547 //------------------------------------------------------------------------------ 548 static int CeedDeviceScale_Sycl(sycl::queue &sycl_queue, CeedScalar *x_array, CeedScalar alpha, CeedSize length) { 549 std::vector<sycl::event> e; 550 551 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; 552 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { x_array[i] *= alpha; }); 553 return CEED_ERROR_SUCCESS; 554 } 555 556 //------------------------------------------------------------------------------ 557 // Compute x = alpha x 558 //------------------------------------------------------------------------------ 559 static int CeedVectorScale_Sycl(CeedVector x, CeedScalar alpha) { 560 Ceed ceed; 561 Ceed_Sycl *data; 562 CeedSize length; 563 CeedVector_Sycl *x_impl; 564 565 CeedCallBackend(CeedVectorGetCeed(x, &ceed)); 566 CeedCallBackend(CeedGetData(ceed, &data)); 567 CeedCallBackend(CeedDestroy(&ceed)); 568 CeedCallBackend(CeedVectorGetData(x, &x_impl)); 569 CeedCallBackend(CeedVectorGetLength(x, &length)); 570 571 // Set value for synced device/host array 572 if (x_impl->d_array) CeedCallBackend(CeedDeviceScale_Sycl(data->sycl_queue, x_impl->d_array, alpha, length)); 573 if (x_impl->h_array) CeedCallBackend(CeedHostScale_Sycl(x_impl->h_array, alpha, length)); 574 return CEED_ERROR_SUCCESS; 575 } 576 577 //------------------------------------------------------------------------------ 578 // Compute y = alpha x + y on the host 579 //------------------------------------------------------------------------------ 580 static int CeedHostAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) { 581 for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i]; 582 return CEED_ERROR_SUCCESS; 583 } 584 585 //------------------------------------------------------------------------------ 586 // Compute y = alpha x + y on device 587 //------------------------------------------------------------------------------ 588 static int CeedDeviceAXPY_Sycl(sycl::queue &sycl_queue, CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) { 589 std::vector<sycl::event> e; 590 591 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; 592 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { y_array[i] += alpha * x_array[i]; }); 593 return CEED_ERROR_SUCCESS; 594 } 595 596 //------------------------------------------------------------------------------ 597 // Compute y = alpha x + y 598 //------------------------------------------------------------------------------ 599 static int CeedVectorAXPY_Sycl(CeedVector y, CeedScalar alpha, CeedVector x) { 600 Ceed ceed; 601 Ceed_Sycl *data; 602 CeedSize length; 603 CeedVector_Sycl *y_impl, *x_impl; 604 605 CeedCallBackend(CeedVectorGetCeed(y, &ceed)); 606 CeedCallBackend(CeedGetData(ceed, &data)); 607 CeedCallBackend(CeedDestroy(&ceed)); 608 CeedCallBackend(CeedVectorGetData(y, &y_impl)); 609 CeedCallBackend(CeedVectorGetData(x, &x_impl)); 610 CeedCallBackend(CeedVectorGetLength(y, &length)); 611 612 // Set value for synced device/host array 613 if (y_impl->d_array) { 614 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE)); 615 CeedCallBackend(CeedDeviceAXPY_Sycl(data->sycl_queue, y_impl->d_array, alpha, x_impl->d_array, length)); 616 } 617 if (y_impl->h_array) { 618 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST)); 619 CeedCallBackend(CeedHostAXPY_Sycl(y_impl->h_array, alpha, x_impl->h_array, length)); 620 } 621 return CEED_ERROR_SUCCESS; 622 } 623 624 //------------------------------------------------------------------------------ 625 // Compute the pointwise multiplication w = x .* y on the host 626 //------------------------------------------------------------------------------ 627 static int CeedHostPointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) { 628 for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i]; 629 return CEED_ERROR_SUCCESS; 630 } 631 632 //------------------------------------------------------------------------------ 633 // Compute the pointwise multiplication w = x .* y on device (impl in .cu file) 634 //------------------------------------------------------------------------------ 635 static int CeedDevicePointwiseMult_Sycl(sycl::queue &sycl_queue, CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) { 636 std::vector<sycl::event> e; 637 638 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; 639 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { w_array[i] = x_array[i] * y_array[i]; }); 640 return CEED_ERROR_SUCCESS; 641 } 642 643 //------------------------------------------------------------------------------ 644 // Compute the pointwise multiplication w = x .* y 645 //------------------------------------------------------------------------------ 646 static int CeedVectorPointwiseMult_Sycl(CeedVector w, CeedVector x, CeedVector y) { 647 Ceed ceed; 648 Ceed_Sycl *data; 649 CeedSize length; 650 CeedVector_Sycl *w_impl, *x_impl, *y_impl; 651 652 CeedCallBackend(CeedVectorGetCeed(w, &ceed)); 653 CeedCallBackend(CeedGetData(ceed, &data)); 654 CeedCallBackend(CeedDestroy(&ceed)); 655 CeedCallBackend(CeedVectorGetData(w, &w_impl)); 656 CeedCallBackend(CeedVectorGetData(x, &x_impl)); 657 CeedCallBackend(CeedVectorGetData(y, &y_impl)); 658 CeedCallBackend(CeedVectorGetLength(w, &length)); 659 660 // Set value for synced device/host array 661 if (!w_impl->d_array && !w_impl->h_array) { 662 CeedCallBackend(CeedVectorSetValue(w, 0.0)); 663 } 664 if (w_impl->d_array) { 665 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE)); 666 CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE)); 667 CeedCallBackend(CeedDevicePointwiseMult_Sycl(data->sycl_queue, w_impl->d_array, x_impl->d_array, y_impl->d_array, length)); 668 } 669 if (w_impl->h_array) { 670 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST)); 671 CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST)); 672 CeedCallBackend(CeedHostPointwiseMult_Sycl(w_impl->h_array, x_impl->h_array, y_impl->h_array, length)); 673 } 674 return CEED_ERROR_SUCCESS; 675 } 676 677 //------------------------------------------------------------------------------ 678 // Destroy the vector 679 //------------------------------------------------------------------------------ 680 static int CeedVectorDestroy_Sycl(const CeedVector vec) { 681 Ceed ceed; 682 Ceed_Sycl *data; 683 CeedVector_Sycl *impl; 684 685 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 686 CeedCallBackend(CeedVectorGetData(vec, &impl)); 687 CeedCallBackend(CeedGetData(ceed, &data)); 688 689 // Wait for all work to finish before freeing memory 690 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw()); 691 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context)); 692 CeedCallSycl(ceed, sycl::free(impl->reduction_norm, data->sycl_context)); 693 694 CeedCallBackend(CeedFree(&impl->h_array_owned)); 695 CeedCallBackend(CeedFree(&impl)); 696 CeedCallBackend(CeedDestroy(&ceed)); 697 return CEED_ERROR_SUCCESS; 698 } 699 700 //------------------------------------------------------------------------------ 701 // Create a vector of the specified length (does not allocate memory) 702 //------------------------------------------------------------------------------ 703 int CeedVectorCreate_Sycl(CeedSize n, CeedVector vec) { 704 Ceed ceed; 705 Ceed_Sycl *data; 706 CeedVector_Sycl *impl; 707 708 CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 709 CeedCallBackend(CeedGetData(ceed, &data)); 710 CeedCallBackend(CeedCalloc(1, &impl)); 711 CeedCallSycl(ceed, impl->reduction_norm = sycl::malloc_host<CeedScalar>(1, data->sycl_context)); 712 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Sycl)); 713 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Sycl)); 714 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Sycl)); 715 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Sycl)); 716 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Sycl)); 717 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Sycl)); 718 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Sycl)); 719 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Sycl)); 720 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Sycl)); 721 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Norm", CeedVectorNorm_Sycl)); 722 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Sycl)); 723 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Sycl)); 724 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Scale", CeedVectorScale_Sycl)); 725 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Sycl)); 726 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Sycl)); 727 CeedCallBackend(CeedDestroy(&ceed)); 728 CeedCallBackend(CeedVectorSetData(vec, impl)); 729 return CEED_ERROR_SUCCESS; 730 } 731