xref: /libCEED/backends/sycl-ref/ceed-sycl-vector.sycl.cpp (revision 8330fa8358e0538fd6980df480dcd5567c9ca368)
1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/backend.h>
9 #include <ceed/ceed.h>
10 
11 #include <cmath>
12 #include <string>
13 #include <sycl/sycl.hpp>
14 
15 #include "ceed-sycl-ref.hpp"
16 
17 //------------------------------------------------------------------------------
18 // Check if host/device sync is needed
19 //------------------------------------------------------------------------------
20 static inline int CeedVectorNeedSync_Sycl(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
21   bool             has_valid_array = false;
22   CeedVector_Sycl *impl;
23 
24   CeedCallBackend(CeedVectorGetData(vec, &impl));
25   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
26   switch (mem_type) {
27     case CEED_MEM_HOST:
28       *need_sync = has_valid_array && !impl->h_array;
29       break;
30     case CEED_MEM_DEVICE:
31       *need_sync = has_valid_array && !impl->d_array;
32       break;
33   }
34   return CEED_ERROR_SUCCESS;
35 }
36 
37 //------------------------------------------------------------------------------
38 // Sync host to device
39 //------------------------------------------------------------------------------
40 static inline int CeedVectorSyncH2D_Sycl(const CeedVector vec) {
41   Ceed             ceed;
42   Ceed_Sycl       *data;
43   CeedSize         length;
44   CeedVector_Sycl *impl;
45 
46   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
47   CeedCallBackend(CeedVectorGetData(vec, &impl));
48   CeedCallBackend(CeedGetData(ceed, &data));
49   CeedCheck(impl->h_array, ceed, CEED_ERROR_BACKEND, "No valid host data to sync to device");
50 
51   CeedCallBackend(CeedVectorGetLength(vec, &length));
52   if (impl->d_array_borrowed) {
53     impl->d_array = impl->d_array_borrowed;
54   } else if (impl->d_array_owned) {
55     impl->d_array = impl->d_array_owned;
56   } else {
57     CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
58     impl->d_array = impl->d_array_owned;
59   }
60 
61   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
62   // Copy from host to device
63   sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(impl->h_array, impl->d_array, length, {e});
64   // Wait for copy to finish and handle exceptions.
65   CeedCallSycl(ceed, copy_event.wait_and_throw());
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 //------------------------------------------------------------------------------
70 // Sync device to host
71 //------------------------------------------------------------------------------
72 static inline int CeedVectorSyncD2H_Sycl(const CeedVector vec) {
73   Ceed             ceed;
74   Ceed_Sycl       *data;
75   CeedSize         length;
76   CeedVector_Sycl *impl;
77 
78   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
79   CeedCallBackend(CeedVectorGetData(vec, &impl));
80   CeedCallBackend(CeedGetData(ceed, &data));
81 
82   CeedCheck(impl->d_array, ceed, CEED_ERROR_BACKEND, "No valid device data to sync to host");
83 
84   CeedCallBackend(CeedVectorGetLength(vec, &length));
85   if (impl->h_array_borrowed) {
86     impl->h_array = impl->h_array_borrowed;
87   } else if (impl->h_array_owned) {
88     impl->h_array = impl->h_array_owned;
89   } else {
90     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
91     impl->h_array = impl->h_array_owned;
92   }
93 
94   // Order queue
95   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
96   // Copy from device to host
97   sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(impl->d_array, impl->h_array, length, {e});
98   // Wait for copy to finish and handle exceptions.
99   CeedCallSycl(ceed, copy_event.wait_and_throw());
100   return CEED_ERROR_SUCCESS;
101 }
102 
103 //------------------------------------------------------------------------------
104 // Sync arrays
105 //------------------------------------------------------------------------------
106 static int CeedVectorSyncArray_Sycl(const CeedVector vec, CeedMemType mem_type) {
107   bool need_sync = false;
108 
109   // Check whether device/host sync is needed
110   CeedCallBackend(CeedVectorNeedSync_Sycl(vec, mem_type, &need_sync));
111   if (!need_sync) return CEED_ERROR_SUCCESS;
112 
113   switch (mem_type) {
114     case CEED_MEM_HOST:
115       return CeedVectorSyncD2H_Sycl(vec);
116     case CEED_MEM_DEVICE:
117       return CeedVectorSyncH2D_Sycl(vec);
118   }
119   return CEED_ERROR_UNSUPPORTED;
120 }
121 
122 //------------------------------------------------------------------------------
123 // Set all pointers as invalid
124 //------------------------------------------------------------------------------
125 static inline int CeedVectorSetAllInvalid_Sycl(const CeedVector vec) {
126   CeedVector_Sycl *impl;
127 
128   CeedCallBackend(CeedVectorGetData(vec, &impl));
129   impl->h_array = NULL;
130   impl->d_array = NULL;
131   return CEED_ERROR_SUCCESS;
132 }
133 
134 //------------------------------------------------------------------------------
135 // Check if CeedVector has any valid pointer
136 //------------------------------------------------------------------------------
137 static inline int CeedVectorHasValidArray_Sycl(const CeedVector vec, bool *has_valid_array) {
138   CeedVector_Sycl *impl;
139 
140   CeedCallBackend(CeedVectorGetData(vec, &impl));
141   *has_valid_array = impl->h_array || impl->d_array;
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 //------------------------------------------------------------------------------
146 // Check if has array of given type
147 //------------------------------------------------------------------------------
148 static inline int CeedVectorHasArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
149   CeedVector_Sycl *impl;
150 
151   CeedCallBackend(CeedVectorGetData(vec, &impl));
152   switch (mem_type) {
153     case CEED_MEM_HOST:
154       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
155       break;
156     case CEED_MEM_DEVICE:
157       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
158       break;
159   }
160   return CEED_ERROR_SUCCESS;
161 }
162 
163 //------------------------------------------------------------------------------
164 // Check if has borrowed array of given type
165 //------------------------------------------------------------------------------
166 static inline int CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
167   CeedVector_Sycl *impl;
168 
169   CeedCallBackend(CeedVectorGetData(vec, &impl));
170   switch (mem_type) {
171     case CEED_MEM_HOST:
172       *has_borrowed_array_of_type = impl->h_array_borrowed;
173       break;
174     case CEED_MEM_DEVICE:
175       *has_borrowed_array_of_type = impl->d_array_borrowed;
176       break;
177   }
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 //------------------------------------------------------------------------------
182 // Set array from host
183 //------------------------------------------------------------------------------
184 static int CeedVectorSetArrayHost_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
185   CeedSize         length;
186   CeedVector_Sycl *impl;
187 
188   CeedCallBackend(CeedVectorGetData(vec, &impl));
189   CeedCallBackend(CeedVectorGetLength(vec, &length));
190 
191   CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
192                                              (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
193   return CEED_ERROR_SUCCESS;
194 }
195 
196 //------------------------------------------------------------------------------
197 // Set array from device
198 //------------------------------------------------------------------------------
199 static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
200   CeedSize         length;
201   Ceed             ceed;
202   Ceed_Sycl       *data;
203   CeedVector_Sycl *impl;
204 
205   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
206   CeedCallBackend(CeedVectorGetData(vec, &impl));
207   CeedCallBackend(CeedGetData(ceed, &data));
208   CeedCallBackend(CeedVectorGetLength(vec, &length));
209 
210   // Order queue
211   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
212 
213   switch (copy_mode) {
214     case CEED_COPY_VALUES: {
215       if (!impl->d_array_owned) {
216         CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
217       }
218       if (array) {
219         sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(array, impl->d_array_owned, length, {e});
220         // Wait for copy to finish and handle exceptions.
221         CeedCallSycl(ceed, copy_event.wait_and_throw());
222       }
223       impl->d_array_borrowed = NULL;
224       impl->d_array          = impl->d_array_owned;
225     } break;
226     case CEED_OWN_POINTER:
227       if (impl->d_array_owned) {
228         // Wait for all work to finish before freeing memory
229         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
230         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
231       }
232       impl->d_array_owned    = array;
233       impl->d_array_borrowed = NULL;
234       impl->d_array          = impl->d_array_owned;
235       break;
236     case CEED_USE_POINTER:
237       if (impl->d_array_owned) {
238         // Wait for all work to finish before freeing memory
239         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
240         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
241       }
242       impl->d_array_owned    = NULL;
243       impl->d_array_borrowed = array;
244       impl->d_array          = impl->d_array_borrowed;
245       break;
246   }
247   return CEED_ERROR_SUCCESS;
248 }
249 
250 //------------------------------------------------------------------------------
251 // Set the array used by a vector,
252 //   freeing any previously allocated array if applicable
253 //------------------------------------------------------------------------------
254 static int CeedVectorSetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
255   CeedVector_Sycl *impl;
256 
257   CeedCallBackend(CeedVectorGetData(vec, &impl));
258 
259   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
260   switch (mem_type) {
261     case CEED_MEM_HOST:
262       return CeedVectorSetArrayHost_Sycl(vec, copy_mode, array);
263     case CEED_MEM_DEVICE:
264       return CeedVectorSetArrayDevice_Sycl(vec, copy_mode, array);
265   }
266   return CEED_ERROR_UNSUPPORTED;
267 }
268 
269 //------------------------------------------------------------------------------
270 // Set host array to value
271 //------------------------------------------------------------------------------
272 static int CeedHostSetValue_Sycl(CeedScalar *h_array, CeedSize length, CeedScalar val) {
273   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
274   return CEED_ERROR_SUCCESS;
275 }
276 
277 //------------------------------------------------------------------------------
278 // Set device array to value
279 //------------------------------------------------------------------------------
280 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, CeedScalar val) {
281   // Order queue
282   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
283   sycl_queue.fill(d_array, val, length, {e});
284   return CEED_ERROR_SUCCESS;
285 }
286 
287 //------------------------------------------------------------------------------
288 // Set a vector to a value,
289 //------------------------------------------------------------------------------
290 static int CeedVectorSetValue_Sycl(CeedVector vec, CeedScalar val) {
291   Ceed             ceed;
292   Ceed_Sycl       *data;
293   CeedSize         length;
294   CeedVector_Sycl *impl;
295 
296   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
297   CeedCallBackend(CeedVectorGetData(vec, &impl));
298   CeedCallBackend(CeedVectorGetLength(vec, &length));
299   CeedCallBackend(CeedGetData(ceed, &data));
300 
301   // Set value for synced device/host array
302   if (!impl->d_array && !impl->h_array) {
303     if (impl->d_array_borrowed) {
304       impl->d_array = impl->d_array_borrowed;
305     } else if (impl->h_array_borrowed) {
306       impl->h_array = impl->h_array_borrowed;
307     } else if (impl->d_array_owned) {
308       impl->d_array = impl->d_array_owned;
309     } else if (impl->h_array_owned) {
310       impl->h_array = impl->h_array_owned;
311     } else {
312       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
313     }
314   }
315   if (impl->d_array) {
316     CeedCallBackend(CeedDeviceSetValue_Sycl(data->sycl_queue, impl->d_array, length, val));
317     impl->h_array = NULL;
318   }
319   if (impl->h_array) {
320     CeedCallBackend(CeedHostSetValue_Sycl(impl->h_array, length, val));
321     impl->d_array = NULL;
322   }
323   return CEED_ERROR_SUCCESS;
324 }
325 
326 //------------------------------------------------------------------------------
327 // Vector Take Array
328 //------------------------------------------------------------------------------
329 static int CeedVectorTakeArray_Sycl(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
330   Ceed             ceed;
331   Ceed_Sycl       *data;
332   CeedVector_Sycl *impl;
333 
334   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
335   CeedCallBackend(CeedVectorGetData(vec, &impl));
336   CeedCallBackend(CeedGetData(ceed, &data));
337 
338   // Order queue
339   data->sycl_queue.ext_oneapi_submit_barrier();
340 
341   // Sync array to requested mem_type
342   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
343 
344   // Update pointer
345   switch (mem_type) {
346     case CEED_MEM_HOST:
347       (*array)               = impl->h_array_borrowed;
348       impl->h_array_borrowed = NULL;
349       impl->h_array          = NULL;
350       break;
351     case CEED_MEM_DEVICE:
352       (*array)               = impl->d_array_borrowed;
353       impl->d_array_borrowed = NULL;
354       impl->d_array          = NULL;
355       break;
356   }
357   return CEED_ERROR_SUCCESS;
358 }
359 
360 //------------------------------------------------------------------------------
361 // Core logic for array syncronization for GetArray.
362 //   If a different memory type is most up to date, this will perform a copy
363 //------------------------------------------------------------------------------
364 static int CeedVectorGetArrayCore_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
365   CeedVector_Sycl *impl;
366 
367   CeedCallBackend(CeedVectorGetData(vec, &impl));
368 
369   // Sync array to requested mem_type
370   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
371 
372   // Update pointer
373   switch (mem_type) {
374     case CEED_MEM_HOST:
375       *array = impl->h_array;
376       break;
377     case CEED_MEM_DEVICE:
378       *array = impl->d_array;
379       break;
380   }
381   return CEED_ERROR_SUCCESS;
382 }
383 
384 //------------------------------------------------------------------------------
385 // Get read-only access to a vector via the specified mem_type
386 //------------------------------------------------------------------------------
387 static int CeedVectorGetArrayRead_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
388   return CeedVectorGetArrayCore_Sycl(vec, mem_type, (CeedScalar **)array);
389 }
390 
391 //------------------------------------------------------------------------------
392 // Get read/write access to a vector via the specified mem_type
393 //------------------------------------------------------------------------------
394 static int CeedVectorGetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
395   CeedVector_Sycl *impl;
396 
397   CeedCallBackend(CeedVectorGetData(vec, &impl));
398   CeedCallBackend(CeedVectorGetArrayCore_Sycl(vec, mem_type, array));
399   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
400   switch (mem_type) {
401     case CEED_MEM_HOST:
402       impl->h_array = *array;
403       break;
404     case CEED_MEM_DEVICE:
405       impl->d_array = *array;
406       break;
407   }
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 //------------------------------------------------------------------------------
412 // Get write access to a vector via the specified mem_type
413 //------------------------------------------------------------------------------
414 static int CeedVectorGetArrayWrite_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
415   bool             has_array_of_type = true;
416   CeedVector_Sycl *impl;
417 
418   CeedCallBackend(CeedVectorGetData(vec, &impl));
419   CeedCallBackend(CeedVectorHasArrayOfType_Sycl(vec, mem_type, &has_array_of_type));
420   if (!has_array_of_type) {
421     // Allocate if array is not yet allocated
422     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
423   } else {
424     // Select dirty array
425     switch (mem_type) {
426       case CEED_MEM_HOST:
427         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
428         else impl->h_array = impl->h_array_owned;
429         break;
430       case CEED_MEM_DEVICE:
431         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
432         else impl->d_array = impl->d_array_owned;
433     }
434   }
435   return CeedVectorGetArray_Sycl(vec, mem_type, array);
436 }
437 
438 //------------------------------------------------------------------------------
439 // Get the norm of a CeedVector
440 //------------------------------------------------------------------------------
441 static int CeedVectorNorm_Sycl(CeedVector vec, CeedNormType type, CeedScalar *norm) {
442   Ceed              ceed;
443   Ceed_Sycl        *data;
444   CeedSize          length;
445   const CeedScalar *d_array;
446   CeedVector_Sycl  *impl;
447 
448   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
449   CeedCallBackend(CeedVectorGetData(vec, &impl));
450   CeedCallBackend(CeedVectorGetLength(vec, &length));
451   CeedCallBackend(CeedGetData(ceed, &data));
452 
453   // Compute norm
454   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
455   switch (type) {
456     case CEED_NORM_1: {
457       // Order queue
458       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
459       auto        sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
460       data->sycl_queue.parallel_for(length, {e}, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += abs(d_array[i]); }).wait_and_throw();
461     } break;
462     case CEED_NORM_2: {
463       // Order queue
464       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
465       auto        sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
466       data->sycl_queue.parallel_for(length, {e}, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += (d_array[i] * d_array[i]); }).wait_and_throw();
467     } break;
468     case CEED_NORM_MAX: {
469       // Order queue
470       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
471       auto        maxReduction = sycl::reduction(impl->reduction_norm, sycl::maximum<>(), {sycl::property::reduction::initialize_to_identity{}});
472       data->sycl_queue.parallel_for(length, {e}, maxReduction, [=](sycl::id<1> i, auto &max) { max.combine(abs(d_array[i])); }).wait_and_throw();
473     } break;
474   }
475   // L2 norm - square root over reduced value
476   if (type == CEED_NORM_2) *norm = sqrt(*impl->reduction_norm);
477   else *norm = *impl->reduction_norm;
478   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
479   return CEED_ERROR_SUCCESS;
480 }
481 
482 //------------------------------------------------------------------------------
483 // Take reciprocal of a vector on host
484 //------------------------------------------------------------------------------
485 static int CeedHostReciprocal_Sycl(CeedScalar *h_array, CeedSize length) {
486   for (CeedSize i = 0; i < length; i++) {
487     if (std::fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
488   }
489   return CEED_ERROR_SUCCESS;
490 }
491 
492 //------------------------------------------------------------------------------
493 // Take reciprocal of a vector on device
494 //------------------------------------------------------------------------------
495 static int CeedDeviceReciprocal_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length) {
496   // Order queue
497   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
498   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) {
499     if (std::fabs(d_array[i]) > CEED_EPSILON) d_array[i] = 1. / d_array[i];
500   });
501   return CEED_ERROR_SUCCESS;
502 }
503 
504 //------------------------------------------------------------------------------
505 // Take reciprocal of a vector
506 //------------------------------------------------------------------------------
507 static int CeedVectorReciprocal_Sycl(CeedVector vec) {
508   Ceed             ceed;
509   Ceed_Sycl       *data;
510   CeedSize         length;
511   CeedVector_Sycl *impl;
512 
513   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
514   CeedCallBackend(CeedVectorGetData(vec, &impl));
515   CeedCallBackend(CeedVectorGetLength(vec, &length));
516   CeedCallBackend(CeedGetData(ceed, &data));
517 
518   // Set value for synced device/host array
519   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Sycl(data->sycl_queue, impl->d_array, length));
520   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Sycl(impl->h_array, length));
521   return CEED_ERROR_SUCCESS;
522 }
523 
524 //------------------------------------------------------------------------------
525 // Compute x = alpha x on the host
526 //------------------------------------------------------------------------------
527 static int CeedHostScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
528   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
529   return CEED_ERROR_SUCCESS;
530 }
531 
532 //------------------------------------------------------------------------------
533 // Compute x = alpha x on device
534 //------------------------------------------------------------------------------
535 static int CeedDeviceScale_Sycl(sycl::queue &sycl_queue, CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
536   // Order queue
537   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
538   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { x_array[i] *= alpha; });
539   return CEED_ERROR_SUCCESS;
540 }
541 
542 //------------------------------------------------------------------------------
543 // Compute x = alpha x
544 //------------------------------------------------------------------------------
545 static int CeedVectorScale_Sycl(CeedVector x, CeedScalar alpha) {
546   Ceed             ceed;
547   Ceed_Sycl       *data;
548   CeedSize         length;
549   CeedVector_Sycl *x_impl;
550 
551   CeedCallBackend(CeedVectorGetCeed(x, &ceed));
552   CeedCallBackend(CeedVectorGetData(x, &x_impl));
553   CeedCallBackend(CeedVectorGetLength(x, &length));
554   CeedCallBackend(CeedGetData(ceed, &data));
555 
556   // Set value for synced device/host array
557   if (x_impl->d_array) CeedCallBackend(CeedDeviceScale_Sycl(data->sycl_queue, x_impl->d_array, alpha, length));
558   if (x_impl->h_array) CeedCallBackend(CeedHostScale_Sycl(x_impl->h_array, alpha, length));
559   return CEED_ERROR_SUCCESS;
560 }
561 
562 //------------------------------------------------------------------------------
563 // Compute y = alpha x + y on the host
564 //------------------------------------------------------------------------------
565 static int CeedHostAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
566   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
567   return CEED_ERROR_SUCCESS;
568 }
569 
570 //------------------------------------------------------------------------------
571 // Compute y = alpha x + y on device
572 //------------------------------------------------------------------------------
573 static int CeedDeviceAXPY_Sycl(sycl::queue &sycl_queue, CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
574   // Order queue
575   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
576   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { y_array[i] += alpha * x_array[i]; });
577   return CEED_ERROR_SUCCESS;
578 }
579 
580 //------------------------------------------------------------------------------
581 // Compute y = alpha x + y
582 //------------------------------------------------------------------------------
583 static int CeedVectorAXPY_Sycl(CeedVector y, CeedScalar alpha, CeedVector x) {
584   Ceed             ceed;
585   Ceed_Sycl       *data;
586   CeedSize         length;
587   CeedVector_Sycl *y_impl, *x_impl;
588 
589   CeedCallBackend(CeedVectorGetCeed(y, &ceed));
590   CeedCallBackend(CeedVectorGetData(y, &y_impl));
591   CeedCallBackend(CeedVectorGetData(x, &x_impl));
592   CeedCallBackend(CeedVectorGetLength(y, &length));
593   CeedCallBackend(CeedGetData(ceed, &data));
594 
595   // Set value for synced device/host array
596   if (y_impl->d_array) {
597     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
598     CeedCallBackend(CeedDeviceAXPY_Sycl(data->sycl_queue, y_impl->d_array, alpha, x_impl->d_array, length));
599   }
600   if (y_impl->h_array) {
601     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
602     CeedCallBackend(CeedHostAXPY_Sycl(y_impl->h_array, alpha, x_impl->h_array, length));
603   }
604   return CEED_ERROR_SUCCESS;
605 }
606 
607 //------------------------------------------------------------------------------
608 // Compute the pointwise multiplication w = x .* y on the host
609 //------------------------------------------------------------------------------
610 static int CeedHostPointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
611   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
612   return CEED_ERROR_SUCCESS;
613 }
614 
615 //------------------------------------------------------------------------------
616 // Compute the pointwise multiplication w = x .* y on device (impl in .cu file)
617 //------------------------------------------------------------------------------
618 static int CeedDevicePointwiseMult_Sycl(sycl::queue &sycl_queue, CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
619   // Order queue
620   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
621   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { w_array[i] = x_array[i] * y_array[i]; });
622   return CEED_ERROR_SUCCESS;
623 }
624 
625 //------------------------------------------------------------------------------
626 // Compute the pointwise multiplication w = x .* y
627 //------------------------------------------------------------------------------
628 static int CeedVectorPointwiseMult_Sycl(CeedVector w, CeedVector x, CeedVector y) {
629   Ceed             ceed;
630   Ceed_Sycl       *data;
631   CeedSize         length;
632   CeedVector_Sycl *w_impl, *x_impl, *y_impl;
633 
634   CeedCallBackend(CeedVectorGetCeed(w, &ceed));
635   CeedCallBackend(CeedVectorGetData(w, &w_impl));
636   CeedCallBackend(CeedVectorGetData(x, &x_impl));
637   CeedCallBackend(CeedVectorGetData(y, &y_impl));
638   CeedCallBackend(CeedVectorGetLength(w, &length));
639   CeedCallBackend(CeedGetData(ceed, &data));
640 
641   // Set value for synced device/host array
642   if (!w_impl->d_array && !w_impl->h_array) {
643     CeedCallBackend(CeedVectorSetValue(w, 0.0));
644   }
645   if (w_impl->d_array) {
646     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
647     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
648     CeedCallBackend(CeedDevicePointwiseMult_Sycl(data->sycl_queue, w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
649   }
650   if (w_impl->h_array) {
651     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
652     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
653     CeedCallBackend(CeedHostPointwiseMult_Sycl(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
654   }
655   return CEED_ERROR_SUCCESS;
656 }
657 
658 //------------------------------------------------------------------------------
659 // Destroy the vector
660 //------------------------------------------------------------------------------
661 static int CeedVectorDestroy_Sycl(const CeedVector vec) {
662   Ceed             ceed;
663   Ceed_Sycl       *data;
664   CeedVector_Sycl *impl;
665 
666   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
667   CeedCallBackend(CeedVectorGetData(vec, &impl));
668   CeedCallBackend(CeedGetData(ceed, &data));
669 
670   // Wait for all work to finish before freeing memory
671   CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
672   CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
673   CeedCallSycl(ceed, sycl::free(impl->reduction_norm, data->sycl_context));
674 
675   CeedCallBackend(CeedFree(&impl->h_array_owned));
676   CeedCallBackend(CeedFree(&impl));
677   return CEED_ERROR_SUCCESS;
678 }
679 
680 //------------------------------------------------------------------------------
681 // Create a vector of the specified length (does not allocate memory)
682 //------------------------------------------------------------------------------
683 int CeedVectorCreate_Sycl(CeedSize n, CeedVector vec) {
684   Ceed             ceed;
685   Ceed_Sycl       *data;
686   CeedVector_Sycl *impl;
687 
688   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
689   CeedCallBackend(CeedGetData(ceed, &data));
690   CeedCallBackend(CeedCalloc(1, &impl));
691   CeedCallSycl(ceed, impl->reduction_norm = sycl::malloc_host<CeedScalar>(1, data->sycl_context));
692   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Sycl));
693   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Sycl));
694   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Sycl));
695   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Sycl));
696   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Sycl));
697   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Sycl));
698   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Sycl));
699   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Sycl));
700   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Sycl));
701   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Norm", CeedVectorNorm_Sycl));
702   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Sycl));
703   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Sycl));
704   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Scale", CeedVectorScale_Sycl));
705   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Sycl));
706   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Sycl));
707   CeedCallBackend(CeedVectorSetData(vec, impl));
708   return CEED_ERROR_SUCCESS;
709 }
710