1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 #include <ceed/backend.h>
9 #include <ceed/ceed.h>
10
11 #include <cmath>
12 #include <string>
13 #include <sycl/sycl.hpp>
14
15 #include "ceed-sycl-ref.hpp"
16
17 //------------------------------------------------------------------------------
18 // Check if host/device sync is needed
19 //------------------------------------------------------------------------------
CeedVectorNeedSync_Sycl(const CeedVector vec,CeedMemType mem_type,bool * need_sync)20 static inline int CeedVectorNeedSync_Sycl(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
21 bool has_valid_array = false;
22 CeedVector_Sycl *impl;
23
24 CeedCallBackend(CeedVectorGetData(vec, &impl));
25 CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
26 switch (mem_type) {
27 case CEED_MEM_HOST:
28 *need_sync = has_valid_array && !impl->h_array;
29 break;
30 case CEED_MEM_DEVICE:
31 *need_sync = has_valid_array && !impl->d_array;
32 break;
33 }
34 return CEED_ERROR_SUCCESS;
35 }
36
37 //------------------------------------------------------------------------------
38 // Sync host to device
39 //------------------------------------------------------------------------------
CeedVectorSyncH2D_Sycl(const CeedVector vec)40 static inline int CeedVectorSyncH2D_Sycl(const CeedVector vec) {
41 Ceed ceed;
42 Ceed_Sycl *data;
43 CeedSize length;
44 CeedVector_Sycl *impl;
45
46 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
47 CeedCallBackend(CeedGetData(ceed, &data));
48 CeedCallBackend(CeedVectorGetData(vec, &impl));
49
50 CeedCheck(impl->h_array, ceed, CEED_ERROR_BACKEND, "No valid host data to sync to device");
51
52 CeedCallBackend(CeedVectorGetLength(vec, &length));
53 if (impl->d_array_borrowed) {
54 impl->d_array = impl->d_array_borrowed;
55 } else if (impl->d_array_owned) {
56 impl->d_array = impl->d_array_owned;
57 } else {
58 CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
59 impl->d_array = impl->d_array_owned;
60 }
61
62 // Copy from host to device
63 std::vector<sycl::event> e;
64
65 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
66 CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(impl->h_array, impl->d_array, length, e).wait_and_throw());
67 CeedCallBackend(CeedDestroy(&ceed));
68 return CEED_ERROR_SUCCESS;
69 }
70
71 //------------------------------------------------------------------------------
72 // Sync device to host
73 //------------------------------------------------------------------------------
CeedVectorSyncD2H_Sycl(const CeedVector vec)74 static inline int CeedVectorSyncD2H_Sycl(const CeedVector vec) {
75 Ceed ceed;
76 Ceed_Sycl *data;
77 CeedSize length;
78 CeedVector_Sycl *impl;
79
80 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
81 CeedCallBackend(CeedGetData(ceed, &data));
82 CeedCallBackend(CeedVectorGetData(vec, &impl));
83
84 CeedCheck(impl->d_array, ceed, CEED_ERROR_BACKEND, "No valid device data to sync to host");
85
86 CeedCallBackend(CeedVectorGetLength(vec, &length));
87 if (impl->h_array_borrowed) {
88 impl->h_array = impl->h_array_borrowed;
89 } else if (impl->h_array_owned) {
90 impl->h_array = impl->h_array_owned;
91 } else {
92 CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
93 impl->h_array = impl->h_array_owned;
94 }
95
96 // Copy from device to host
97 std::vector<sycl::event> e;
98
99 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
100 CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(impl->d_array, impl->h_array, length, e).wait_and_throw());
101 CeedCallBackend(CeedDestroy(&ceed));
102 return CEED_ERROR_SUCCESS;
103 }
104
105 //------------------------------------------------------------------------------
106 // Sync arrays
107 //------------------------------------------------------------------------------
CeedVectorSyncArray_Sycl(const CeedVector vec,CeedMemType mem_type)108 static int CeedVectorSyncArray_Sycl(const CeedVector vec, CeedMemType mem_type) {
109 bool need_sync = false;
110
111 // Check whether device/host sync is needed
112 CeedCallBackend(CeedVectorNeedSync_Sycl(vec, mem_type, &need_sync));
113 if (!need_sync) return CEED_ERROR_SUCCESS;
114
115 switch (mem_type) {
116 case CEED_MEM_HOST:
117 return CeedVectorSyncD2H_Sycl(vec);
118 case CEED_MEM_DEVICE:
119 return CeedVectorSyncH2D_Sycl(vec);
120 }
121 return CEED_ERROR_UNSUPPORTED;
122 }
123
124 //------------------------------------------------------------------------------
125 // Set all pointers as invalid
126 //------------------------------------------------------------------------------
CeedVectorSetAllInvalid_Sycl(const CeedVector vec)127 static inline int CeedVectorSetAllInvalid_Sycl(const CeedVector vec) {
128 CeedVector_Sycl *impl;
129
130 CeedCallBackend(CeedVectorGetData(vec, &impl));
131 impl->h_array = NULL;
132 impl->d_array = NULL;
133 return CEED_ERROR_SUCCESS;
134 }
135
136 //------------------------------------------------------------------------------
137 // Check if CeedVector has any valid pointer
138 //------------------------------------------------------------------------------
CeedVectorHasValidArray_Sycl(const CeedVector vec,bool * has_valid_array)139 static inline int CeedVectorHasValidArray_Sycl(const CeedVector vec, bool *has_valid_array) {
140 CeedVector_Sycl *impl;
141
142 CeedCallBackend(CeedVectorGetData(vec, &impl));
143 *has_valid_array = impl->h_array || impl->d_array;
144 return CEED_ERROR_SUCCESS;
145 }
146
147 //------------------------------------------------------------------------------
148 // Check if has array of given type
149 //------------------------------------------------------------------------------
CeedVectorHasArrayOfType_Sycl(const CeedVector vec,CeedMemType mem_type,bool * has_array_of_type)150 static inline int CeedVectorHasArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
151 CeedVector_Sycl *impl;
152
153 CeedCallBackend(CeedVectorGetData(vec, &impl));
154 switch (mem_type) {
155 case CEED_MEM_HOST:
156 *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
157 break;
158 case CEED_MEM_DEVICE:
159 *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
160 break;
161 }
162 return CEED_ERROR_SUCCESS;
163 }
164
165 //------------------------------------------------------------------------------
166 // Check if has borrowed array of given type
167 //------------------------------------------------------------------------------
CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec,CeedMemType mem_type,bool * has_borrowed_array_of_type)168 static inline int CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
169 CeedVector_Sycl *impl;
170
171 CeedCallBackend(CeedVectorGetData(vec, &impl));
172 switch (mem_type) {
173 case CEED_MEM_HOST:
174 *has_borrowed_array_of_type = impl->h_array_borrowed;
175 break;
176 case CEED_MEM_DEVICE:
177 *has_borrowed_array_of_type = impl->d_array_borrowed;
178 break;
179 }
180 return CEED_ERROR_SUCCESS;
181 }
182
183 //------------------------------------------------------------------------------
184 // Set array from host
185 //------------------------------------------------------------------------------
CeedVectorSetArrayHost_Sycl(const CeedVector vec,const CeedCopyMode copy_mode,CeedScalar * array)186 static int CeedVectorSetArrayHost_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
187 CeedSize length;
188 CeedVector_Sycl *impl;
189
190 CeedCallBackend(CeedVectorGetData(vec, &impl));
191 CeedCallBackend(CeedVectorGetLength(vec, &length));
192
193 CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
194 (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
195 return CEED_ERROR_SUCCESS;
196 }
197
198 //------------------------------------------------------------------------------
199 // Set array from device
200 //------------------------------------------------------------------------------
CeedVectorSetArrayDevice_Sycl(const CeedVector vec,const CeedCopyMode copy_mode,CeedScalar * array)201 static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
202 CeedSize length;
203 Ceed ceed;
204 Ceed_Sycl *data;
205 CeedVector_Sycl *impl;
206
207 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
208 CeedCallBackend(CeedVectorGetData(vec, &impl));
209 CeedCallBackend(CeedGetData(ceed, &data));
210 CeedCallBackend(CeedVectorGetLength(vec, &length));
211
212 // Order queue if needed.
213 std::vector<sycl::event> e;
214
215 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
216
217 switch (copy_mode) {
218 case CEED_COPY_VALUES: {
219 if (!impl->d_array_owned) {
220 CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
221 }
222 if (array) {
223 // Wait for copy to finish and handle exceptions.
224 CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(array, impl->d_array_owned, length, e).wait_and_throw());
225 }
226 impl->d_array_borrowed = NULL;
227 impl->d_array = impl->d_array_owned;
228 } break;
229 case CEED_OWN_POINTER:
230 if (impl->d_array_owned) {
231 // Wait for all work to finish before freeing memory
232 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
233 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
234 }
235 impl->d_array_owned = array;
236 impl->d_array_borrowed = NULL;
237 impl->d_array = impl->d_array_owned;
238 break;
239 case CEED_USE_POINTER:
240 if (impl->d_array_owned) {
241 // Wait for all work to finish before freeing memory
242 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
243 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
244 }
245 impl->d_array_owned = NULL;
246 impl->d_array_borrowed = array;
247 impl->d_array = impl->d_array_borrowed;
248 break;
249 }
250 CeedCallBackend(CeedDestroy(&ceed));
251 return CEED_ERROR_SUCCESS;
252 }
253
254 //------------------------------------------------------------------------------
255 // Set the array used by a vector,
256 // freeing any previously allocated array if applicable
257 //------------------------------------------------------------------------------
CeedVectorSetArray_Sycl(const CeedVector vec,const CeedMemType mem_type,const CeedCopyMode copy_mode,CeedScalar * array)258 static int CeedVectorSetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
259 CeedVector_Sycl *impl;
260
261 CeedCallBackend(CeedVectorGetData(vec, &impl));
262
263 CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
264 switch (mem_type) {
265 case CEED_MEM_HOST:
266 return CeedVectorSetArrayHost_Sycl(vec, copy_mode, array);
267 case CEED_MEM_DEVICE:
268 return CeedVectorSetArrayDevice_Sycl(vec, copy_mode, array);
269 }
270 return CEED_ERROR_UNSUPPORTED;
271 }
272
273 //------------------------------------------------------------------------------
274 // Set host array to value
275 //------------------------------------------------------------------------------
CeedHostSetValue_Sycl(CeedScalar * h_array,CeedSize length,CeedScalar val)276 static int CeedHostSetValue_Sycl(CeedScalar *h_array, CeedSize length, CeedScalar val) {
277 for (CeedSize i = 0; i < length; i++) h_array[i] = val;
278 return CEED_ERROR_SUCCESS;
279 }
280
281 //------------------------------------------------------------------------------
282 // Set device array to value
283 //------------------------------------------------------------------------------
CeedDeviceSetValue_Sycl(sycl::queue & sycl_queue,CeedScalar * d_array,CeedSize length,CeedScalar val)284 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, CeedScalar val) {
285 std::vector<sycl::event> e;
286
287 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
288 sycl_queue.fill(d_array, val, length, e);
289 return CEED_ERROR_SUCCESS;
290 }
291
292 //------------------------------------------------------------------------------
293 // Set a vector to a value,
294 //------------------------------------------------------------------------------
CeedVectorSetValue_Sycl(CeedVector vec,CeedScalar val)295 static int CeedVectorSetValue_Sycl(CeedVector vec, CeedScalar val) {
296 Ceed ceed;
297 Ceed_Sycl *data;
298 CeedSize length;
299 CeedVector_Sycl *impl;
300
301 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
302 CeedCallBackend(CeedGetData(ceed, &data));
303 CeedCallBackend(CeedDestroy(&ceed));
304 CeedCallBackend(CeedVectorGetData(vec, &impl));
305 CeedCallBackend(CeedVectorGetLength(vec, &length));
306
307 // Set value for synced device/host array
308 if (!impl->d_array && !impl->h_array) {
309 if (impl->d_array_borrowed) {
310 impl->d_array = impl->d_array_borrowed;
311 } else if (impl->h_array_borrowed) {
312 impl->h_array = impl->h_array_borrowed;
313 } else if (impl->d_array_owned) {
314 impl->d_array = impl->d_array_owned;
315 } else if (impl->h_array_owned) {
316 impl->h_array = impl->h_array_owned;
317 } else {
318 CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
319 }
320 }
321 if (impl->d_array) {
322 CeedCallBackend(CeedDeviceSetValue_Sycl(data->sycl_queue, impl->d_array, length, val));
323 impl->h_array = NULL;
324 }
325 if (impl->h_array) {
326 CeedCallBackend(CeedHostSetValue_Sycl(impl->h_array, length, val));
327 impl->d_array = NULL;
328 }
329 return CEED_ERROR_SUCCESS;
330 }
331
332 //------------------------------------------------------------------------------
333 // Vector Take Array
334 //------------------------------------------------------------------------------
CeedVectorTakeArray_Sycl(CeedVector vec,CeedMemType mem_type,CeedScalar ** array)335 static int CeedVectorTakeArray_Sycl(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
336 Ceed ceed;
337 Ceed_Sycl *data;
338 CeedVector_Sycl *impl;
339
340 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
341 CeedCallBackend(CeedGetData(ceed, &data));
342 CeedCallBackend(CeedVectorGetData(vec, &impl));
343 CeedCallBackend(CeedDestroy(&ceed));
344 CeedCallBackend(CeedVectorGetData(vec, &impl));
345
346 // Order queue if needed
347 if (!data->sycl_queue.is_in_order()) data->sycl_queue.ext_oneapi_submit_barrier();
348
349 // Sync array to requested mem_type
350 CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
351
352 // Update pointer
353 switch (mem_type) {
354 case CEED_MEM_HOST:
355 (*array) = impl->h_array_borrowed;
356 impl->h_array_borrowed = NULL;
357 impl->h_array = NULL;
358 break;
359 case CEED_MEM_DEVICE:
360 (*array) = impl->d_array_borrowed;
361 impl->d_array_borrowed = NULL;
362 impl->d_array = NULL;
363 break;
364 }
365 return CEED_ERROR_SUCCESS;
366 }
367
368 //------------------------------------------------------------------------------
369 // Core logic for array syncronization for GetArray.
370 // If a different memory type is most up to date, this will perform a copy
371 //------------------------------------------------------------------------------
CeedVectorGetArrayCore_Sycl(const CeedVector vec,const CeedMemType mem_type,CeedScalar ** array)372 static int CeedVectorGetArrayCore_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
373 CeedVector_Sycl *impl;
374
375 CeedCallBackend(CeedVectorGetData(vec, &impl));
376
377 // Sync array to requested mem_type
378 CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
379
380 // Update pointer
381 switch (mem_type) {
382 case CEED_MEM_HOST:
383 *array = impl->h_array;
384 break;
385 case CEED_MEM_DEVICE:
386 *array = impl->d_array;
387 break;
388 }
389 return CEED_ERROR_SUCCESS;
390 }
391
392 //------------------------------------------------------------------------------
393 // Get read-only access to a vector via the specified mem_type
394 //------------------------------------------------------------------------------
CeedVectorGetArrayRead_Sycl(const CeedVector vec,const CeedMemType mem_type,const CeedScalar ** array)395 static int CeedVectorGetArrayRead_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
396 return CeedVectorGetArrayCore_Sycl(vec, mem_type, (CeedScalar **)array);
397 }
398
399 //------------------------------------------------------------------------------
400 // Get read/write access to a vector via the specified mem_type
401 //------------------------------------------------------------------------------
CeedVectorGetArray_Sycl(const CeedVector vec,const CeedMemType mem_type,CeedScalar ** array)402 static int CeedVectorGetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
403 CeedVector_Sycl *impl;
404
405 CeedCallBackend(CeedVectorGetData(vec, &impl));
406 CeedCallBackend(CeedVectorGetArrayCore_Sycl(vec, mem_type, array));
407 CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
408 switch (mem_type) {
409 case CEED_MEM_HOST:
410 impl->h_array = *array;
411 break;
412 case CEED_MEM_DEVICE:
413 impl->d_array = *array;
414 break;
415 }
416 return CEED_ERROR_SUCCESS;
417 }
418
419 //------------------------------------------------------------------------------
420 // Get write access to a vector via the specified mem_type
421 //------------------------------------------------------------------------------
CeedVectorGetArrayWrite_Sycl(const CeedVector vec,const CeedMemType mem_type,CeedScalar ** array)422 static int CeedVectorGetArrayWrite_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
423 bool has_array_of_type = true;
424 CeedVector_Sycl *impl;
425
426 CeedCallBackend(CeedVectorGetData(vec, &impl));
427 CeedCallBackend(CeedVectorHasArrayOfType_Sycl(vec, mem_type, &has_array_of_type));
428 if (!has_array_of_type) {
429 // Allocate if array is not yet allocated
430 CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
431 } else {
432 // Select dirty array
433 switch (mem_type) {
434 case CEED_MEM_HOST:
435 if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
436 else impl->h_array = impl->h_array_owned;
437 break;
438 case CEED_MEM_DEVICE:
439 if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
440 else impl->d_array = impl->d_array_owned;
441 }
442 }
443 return CeedVectorGetArray_Sycl(vec, mem_type, array);
444 }
445
446 //------------------------------------------------------------------------------
447 // Get the norm of a CeedVector
448 //------------------------------------------------------------------------------
CeedVectorNorm_Sycl(CeedVector vec,CeedNormType type,CeedScalar * norm)449 static int CeedVectorNorm_Sycl(CeedVector vec, CeedNormType type, CeedScalar *norm) {
450 Ceed ceed;
451 Ceed_Sycl *data;
452 CeedSize length;
453 const CeedScalar *d_array;
454 CeedVector_Sycl *impl;
455
456 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
457 CeedCallBackend(CeedGetData(ceed, &data));
458 CeedCallBackend(CeedDestroy(&ceed));
459 CeedCallBackend(CeedVectorGetData(vec, &impl));
460 CeedCallBackend(CeedVectorGetLength(vec, &length));
461
462 // Compute norm
463 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
464
465 std::vector<sycl::event> e;
466
467 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
468
469 switch (type) {
470 case CEED_NORM_1: {
471 // Order queue
472 auto sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
473 data->sycl_queue.parallel_for(length, e, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += abs(d_array[i]); }).wait_and_throw();
474 } break;
475 case CEED_NORM_2: {
476 // Order queue
477 auto sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
478 data->sycl_queue.parallel_for(length, e, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += (d_array[i] * d_array[i]); }).wait_and_throw();
479 } break;
480 case CEED_NORM_MAX: {
481 // Order queue
482 auto maxReduction = sycl::reduction(impl->reduction_norm, sycl::maximum<>(), {sycl::property::reduction::initialize_to_identity{}});
483 data->sycl_queue.parallel_for(length, e, maxReduction, [=](sycl::id<1> i, auto &max) { max.combine(abs(d_array[i])); }).wait_and_throw();
484 } break;
485 }
486 // L2 norm - square root over reduced value
487 if (type == CEED_NORM_2) *norm = sqrt(*impl->reduction_norm);
488 else *norm = *impl->reduction_norm;
489 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
490 return CEED_ERROR_SUCCESS;
491 }
492
493 //------------------------------------------------------------------------------
494 // Take reciprocal of a vector on host
495 //------------------------------------------------------------------------------
CeedHostReciprocal_Sycl(CeedScalar * h_array,CeedSize length)496 static int CeedHostReciprocal_Sycl(CeedScalar *h_array, CeedSize length) {
497 for (CeedSize i = 0; i < length; i++) {
498 if (std::fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
499 }
500 return CEED_ERROR_SUCCESS;
501 }
502
503 //------------------------------------------------------------------------------
504 // Take reciprocal of a vector on device
505 //------------------------------------------------------------------------------
CeedDeviceReciprocal_Sycl(sycl::queue & sycl_queue,CeedScalar * d_array,CeedSize length)506 static int CeedDeviceReciprocal_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length) {
507 std::vector<sycl::event> e;
508
509 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
510 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) {
511 if (std::fabs(d_array[i]) > CEED_EPSILON) d_array[i] = 1. / d_array[i];
512 });
513 return CEED_ERROR_SUCCESS;
514 }
515
516 //------------------------------------------------------------------------------
517 // Take reciprocal of a vector
518 //------------------------------------------------------------------------------
CeedVectorReciprocal_Sycl(CeedVector vec)519 static int CeedVectorReciprocal_Sycl(CeedVector vec) {
520 Ceed ceed;
521 Ceed_Sycl *data;
522 CeedSize length;
523 CeedVector_Sycl *impl;
524
525 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
526 CeedCallBackend(CeedGetData(ceed, &data));
527 CeedCallBackend(CeedDestroy(&ceed));
528 CeedCallBackend(CeedVectorGetData(vec, &impl));
529 CeedCallBackend(CeedVectorGetLength(vec, &length));
530
531 // Set value for synced device/host array
532 if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Sycl(data->sycl_queue, impl->d_array, length));
533 if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Sycl(impl->h_array, length));
534 return CEED_ERROR_SUCCESS;
535 }
536
537 //------------------------------------------------------------------------------
538 // Compute x = alpha x on the host
539 //------------------------------------------------------------------------------
CeedHostScale_Sycl(CeedScalar * x_array,CeedScalar alpha,CeedSize length)540 static int CeedHostScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
541 for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
542 return CEED_ERROR_SUCCESS;
543 }
544
545 //------------------------------------------------------------------------------
546 // Compute x = alpha x on device
547 //------------------------------------------------------------------------------
CeedDeviceScale_Sycl(sycl::queue & sycl_queue,CeedScalar * x_array,CeedScalar alpha,CeedSize length)548 static int CeedDeviceScale_Sycl(sycl::queue &sycl_queue, CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
549 std::vector<sycl::event> e;
550
551 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
552 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { x_array[i] *= alpha; });
553 return CEED_ERROR_SUCCESS;
554 }
555
556 //------------------------------------------------------------------------------
557 // Compute x = alpha x
558 //------------------------------------------------------------------------------
CeedVectorScale_Sycl(CeedVector x,CeedScalar alpha)559 static int CeedVectorScale_Sycl(CeedVector x, CeedScalar alpha) {
560 Ceed ceed;
561 Ceed_Sycl *data;
562 CeedSize length;
563 CeedVector_Sycl *x_impl;
564
565 CeedCallBackend(CeedVectorGetCeed(x, &ceed));
566 CeedCallBackend(CeedGetData(ceed, &data));
567 CeedCallBackend(CeedDestroy(&ceed));
568 CeedCallBackend(CeedVectorGetData(x, &x_impl));
569 CeedCallBackend(CeedVectorGetLength(x, &length));
570
571 // Set value for synced device/host array
572 if (x_impl->d_array) CeedCallBackend(CeedDeviceScale_Sycl(data->sycl_queue, x_impl->d_array, alpha, length));
573 if (x_impl->h_array) CeedCallBackend(CeedHostScale_Sycl(x_impl->h_array, alpha, length));
574 return CEED_ERROR_SUCCESS;
575 }
576
577 //------------------------------------------------------------------------------
578 // Compute y = alpha x + y on the host
579 //------------------------------------------------------------------------------
CeedHostAXPY_Sycl(CeedScalar * y_array,CeedScalar alpha,CeedScalar * x_array,CeedSize length)580 static int CeedHostAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
581 for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
582 return CEED_ERROR_SUCCESS;
583 }
584
585 //------------------------------------------------------------------------------
586 // Compute y = alpha x + y on device
587 //------------------------------------------------------------------------------
CeedDeviceAXPY_Sycl(sycl::queue & sycl_queue,CeedScalar * y_array,CeedScalar alpha,CeedScalar * x_array,CeedSize length)588 static int CeedDeviceAXPY_Sycl(sycl::queue &sycl_queue, CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
589 std::vector<sycl::event> e;
590
591 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
592 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { y_array[i] += alpha * x_array[i]; });
593 return CEED_ERROR_SUCCESS;
594 }
595
596 //------------------------------------------------------------------------------
597 // Compute y = alpha x + y
598 //------------------------------------------------------------------------------
CeedVectorAXPY_Sycl(CeedVector y,CeedScalar alpha,CeedVector x)599 static int CeedVectorAXPY_Sycl(CeedVector y, CeedScalar alpha, CeedVector x) {
600 Ceed ceed;
601 Ceed_Sycl *data;
602 CeedSize length;
603 CeedVector_Sycl *y_impl, *x_impl;
604
605 CeedCallBackend(CeedVectorGetCeed(y, &ceed));
606 CeedCallBackend(CeedGetData(ceed, &data));
607 CeedCallBackend(CeedDestroy(&ceed));
608 CeedCallBackend(CeedVectorGetData(y, &y_impl));
609 CeedCallBackend(CeedVectorGetData(x, &x_impl));
610 CeedCallBackend(CeedVectorGetLength(y, &length));
611
612 // Set value for synced device/host array
613 if (y_impl->d_array) {
614 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
615 CeedCallBackend(CeedDeviceAXPY_Sycl(data->sycl_queue, y_impl->d_array, alpha, x_impl->d_array, length));
616 }
617 if (y_impl->h_array) {
618 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
619 CeedCallBackend(CeedHostAXPY_Sycl(y_impl->h_array, alpha, x_impl->h_array, length));
620 }
621 return CEED_ERROR_SUCCESS;
622 }
623
624 //------------------------------------------------------------------------------
625 // Compute the pointwise multiplication w = x .* y on the host
626 //------------------------------------------------------------------------------
CeedHostPointwiseMult_Sycl(CeedScalar * w_array,CeedScalar * x_array,CeedScalar * y_array,CeedSize length)627 static int CeedHostPointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
628 for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
629 return CEED_ERROR_SUCCESS;
630 }
631
632 //------------------------------------------------------------------------------
633 // Compute the pointwise multiplication w = x .* y on device (impl in .cu file)
634 //------------------------------------------------------------------------------
CeedDevicePointwiseMult_Sycl(sycl::queue & sycl_queue,CeedScalar * w_array,CeedScalar * x_array,CeedScalar * y_array,CeedSize length)635 static int CeedDevicePointwiseMult_Sycl(sycl::queue &sycl_queue, CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
636 std::vector<sycl::event> e;
637
638 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
639 sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { w_array[i] = x_array[i] * y_array[i]; });
640 return CEED_ERROR_SUCCESS;
641 }
642
643 //------------------------------------------------------------------------------
644 // Compute the pointwise multiplication w = x .* y
645 //------------------------------------------------------------------------------
CeedVectorPointwiseMult_Sycl(CeedVector w,CeedVector x,CeedVector y)646 static int CeedVectorPointwiseMult_Sycl(CeedVector w, CeedVector x, CeedVector y) {
647 Ceed ceed;
648 Ceed_Sycl *data;
649 CeedSize length;
650 CeedVector_Sycl *w_impl, *x_impl, *y_impl;
651
652 CeedCallBackend(CeedVectorGetCeed(w, &ceed));
653 CeedCallBackend(CeedGetData(ceed, &data));
654 CeedCallBackend(CeedDestroy(&ceed));
655 CeedCallBackend(CeedVectorGetData(w, &w_impl));
656 CeedCallBackend(CeedVectorGetData(x, &x_impl));
657 CeedCallBackend(CeedVectorGetData(y, &y_impl));
658 CeedCallBackend(CeedVectorGetLength(w, &length));
659
660 // Set value for synced device/host array
661 if (!w_impl->d_array && !w_impl->h_array) {
662 CeedCallBackend(CeedVectorSetValue(w, 0.0));
663 }
664 if (w_impl->d_array) {
665 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
666 CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
667 CeedCallBackend(CeedDevicePointwiseMult_Sycl(data->sycl_queue, w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
668 }
669 if (w_impl->h_array) {
670 CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
671 CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
672 CeedCallBackend(CeedHostPointwiseMult_Sycl(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
673 }
674 return CEED_ERROR_SUCCESS;
675 }
676
677 //------------------------------------------------------------------------------
678 // Destroy the vector
679 //------------------------------------------------------------------------------
CeedVectorDestroy_Sycl(const CeedVector vec)680 static int CeedVectorDestroy_Sycl(const CeedVector vec) {
681 Ceed ceed;
682 Ceed_Sycl *data;
683 CeedVector_Sycl *impl;
684
685 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
686 CeedCallBackend(CeedVectorGetData(vec, &impl));
687 CeedCallBackend(CeedGetData(ceed, &data));
688
689 // Wait for all work to finish before freeing memory
690 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
691 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
692 CeedCallSycl(ceed, sycl::free(impl->reduction_norm, data->sycl_context));
693
694 CeedCallBackend(CeedFree(&impl->h_array_owned));
695 CeedCallBackend(CeedFree(&impl));
696 CeedCallBackend(CeedDestroy(&ceed));
697 return CEED_ERROR_SUCCESS;
698 }
699
700 //------------------------------------------------------------------------------
701 // Create a vector of the specified length (does not allocate memory)
702 //------------------------------------------------------------------------------
CeedVectorCreate_Sycl(CeedSize n,CeedVector vec)703 int CeedVectorCreate_Sycl(CeedSize n, CeedVector vec) {
704 Ceed ceed;
705 Ceed_Sycl *data;
706 CeedVector_Sycl *impl;
707
708 CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
709 CeedCallBackend(CeedGetData(ceed, &data));
710 CeedCallBackend(CeedCalloc(1, &impl));
711 CeedCallSycl(ceed, impl->reduction_norm = sycl::malloc_host<CeedScalar>(1, data->sycl_context));
712 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Sycl));
713 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Sycl));
714 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Sycl));
715 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Sycl));
716 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Sycl));
717 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Sycl));
718 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Sycl));
719 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Sycl));
720 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Sycl));
721 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Norm", CeedVectorNorm_Sycl));
722 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Sycl));
723 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Sycl));
724 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Scale", CeedVectorScale_Sycl));
725 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Sycl));
726 CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Sycl));
727 CeedCallBackend(CeedDestroy(&ceed));
728 CeedCallBackend(CeedVectorSetData(vec, impl));
729 return CEED_ERROR_SUCCESS;
730 }
731