xref: /libCEED/backends/sycl-ref/ceed-sycl-vector.sycl.cpp (revision f5d1e50421556545666f89e18ad21fef6dcea5ba)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/backend.h>
9 #include <ceed/ceed.h>
10 
11 #include <cmath>
12 #include <string>
13 #include <sycl/sycl.hpp>
14 
15 #include "ceed-sycl-ref.hpp"
16 
17 //------------------------------------------------------------------------------
18 // Check if host/device sync is needed
19 //------------------------------------------------------------------------------
20 static inline int CeedVectorNeedSync_Sycl(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
21   bool             has_valid_array = false;
22   CeedVector_Sycl *impl;
23 
24   CeedCallBackend(CeedVectorGetData(vec, &impl));
25   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
26   switch (mem_type) {
27     case CEED_MEM_HOST:
28       *need_sync = has_valid_array && !impl->h_array;
29       break;
30     case CEED_MEM_DEVICE:
31       *need_sync = has_valid_array && !impl->d_array;
32       break;
33   }
34   return CEED_ERROR_SUCCESS;
35 }
36 
37 //------------------------------------------------------------------------------
38 // Sync host to device
39 //------------------------------------------------------------------------------
40 static inline int CeedVectorSyncH2D_Sycl(const CeedVector vec) {
41   Ceed             ceed;
42   Ceed_Sycl       *data;
43   CeedSize         length;
44   CeedVector_Sycl *impl;
45 
46   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
47   CeedCallBackend(CeedVectorGetData(vec, &impl));
48   CeedCallBackend(CeedGetData(ceed, &data));
49   CeedCheck(impl->h_array, ceed, CEED_ERROR_BACKEND, "No valid host data to sync to device");
50 
51   CeedCallBackend(CeedVectorGetLength(vec, &length));
52   if (impl->d_array_borrowed) {
53     impl->d_array = impl->d_array_borrowed;
54   } else if (impl->d_array_owned) {
55     impl->d_array = impl->d_array_owned;
56   } else {
57     CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
58     impl->d_array = impl->d_array_owned;
59   }
60 
61   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
62   // Copy from host to device
63   sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(impl->h_array, impl->d_array, length, {e});
64   // Wait for copy to finish and handle exceptions.
65   CeedCallSycl(ceed, copy_event.wait_and_throw());
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 //------------------------------------------------------------------------------
70 // Sync device to host
71 //------------------------------------------------------------------------------
72 static inline int CeedVectorSyncD2H_Sycl(const CeedVector vec) {
73   Ceed             ceed;
74   Ceed_Sycl       *data;
75   CeedSize         length;
76   CeedVector_Sycl *impl;
77 
78   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
79   CeedCallBackend(CeedVectorGetData(vec, &impl));
80   CeedCallBackend(CeedGetData(ceed, &data));
81 
82   CeedCheck(impl->d_array, ceed, CEED_ERROR_BACKEND, "No valid device data to sync to host");
83 
84   CeedCallBackend(CeedVectorGetLength(vec, &length));
85   if (impl->h_array_borrowed) {
86     impl->h_array = impl->h_array_borrowed;
87   } else if (impl->h_array_owned) {
88     impl->h_array = impl->h_array_owned;
89   } else {
90     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
91     impl->h_array = impl->h_array_owned;
92   }
93 
94   // Order queue
95   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
96   // Copy from device to host
97   sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(impl->d_array, impl->h_array, length, {e});
98   // Wait for copy to finish and handle exceptions.
99   CeedCallSycl(ceed, copy_event.wait_and_throw());
100   return CEED_ERROR_SUCCESS;
101 }
102 
103 //------------------------------------------------------------------------------
104 // Sync arrays
105 //------------------------------------------------------------------------------
106 static int CeedVectorSyncArray_Sycl(const CeedVector vec, CeedMemType mem_type) {
107   bool need_sync = false;
108 
109   // Check whether device/host sync is needed
110   CeedCallBackend(CeedVectorNeedSync_Sycl(vec, mem_type, &need_sync));
111   if (!need_sync) return CEED_ERROR_SUCCESS;
112 
113   switch (mem_type) {
114     case CEED_MEM_HOST:
115       return CeedVectorSyncD2H_Sycl(vec);
116     case CEED_MEM_DEVICE:
117       return CeedVectorSyncH2D_Sycl(vec);
118   }
119   return CEED_ERROR_UNSUPPORTED;
120 }
121 
122 //------------------------------------------------------------------------------
123 // Set all pointers as invalid
124 //------------------------------------------------------------------------------
125 static inline int CeedVectorSetAllInvalid_Sycl(const CeedVector vec) {
126   CeedVector_Sycl *impl;
127 
128   CeedCallBackend(CeedVectorGetData(vec, &impl));
129   impl->h_array = NULL;
130   impl->d_array = NULL;
131   return CEED_ERROR_SUCCESS;
132 }
133 
134 //------------------------------------------------------------------------------
135 // Check if CeedVector has any valid pointer
136 //------------------------------------------------------------------------------
137 static inline int CeedVectorHasValidArray_Sycl(const CeedVector vec, bool *has_valid_array) {
138   CeedVector_Sycl *impl;
139 
140   CeedCallBackend(CeedVectorGetData(vec, &impl));
141   *has_valid_array = impl->h_array || impl->d_array;
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 //------------------------------------------------------------------------------
146 // Check if has array of given type
147 //------------------------------------------------------------------------------
148 static inline int CeedVectorHasArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
149   CeedVector_Sycl *impl;
150 
151   CeedCallBackend(CeedVectorGetData(vec, &impl));
152   switch (mem_type) {
153     case CEED_MEM_HOST:
154       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
155       break;
156     case CEED_MEM_DEVICE:
157       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
158       break;
159   }
160   return CEED_ERROR_SUCCESS;
161 }
162 
163 //------------------------------------------------------------------------------
164 // Check if has borrowed array of given type
165 //------------------------------------------------------------------------------
166 static inline int CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
167   CeedVector_Sycl *impl;
168 
169   CeedCallBackend(CeedVectorGetData(vec, &impl));
170   switch (mem_type) {
171     case CEED_MEM_HOST:
172       *has_borrowed_array_of_type = impl->h_array_borrowed;
173       break;
174     case CEED_MEM_DEVICE:
175       *has_borrowed_array_of_type = impl->d_array_borrowed;
176       break;
177   }
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 //------------------------------------------------------------------------------
182 // Set array from host
183 //------------------------------------------------------------------------------
184 static int CeedVectorSetArrayHost_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
185   CeedSize         length;
186   CeedVector_Sycl *impl;
187 
188   CeedCallBackend(CeedVectorGetData(vec, &impl));
189   CeedCallBackend(CeedVectorGetLength(vec, &length));
190 
191   CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
192                                              (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
193   return CEED_ERROR_SUCCESS;
194 }
195 
196 //------------------------------------------------------------------------------
197 // Set array from device
198 //------------------------------------------------------------------------------
199 static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
200   CeedSize         length;
201   Ceed             ceed;
202   Ceed_Sycl       *data;
203   CeedVector_Sycl *impl;
204 
205   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
206   CeedCallBackend(CeedVectorGetData(vec, &impl));
207   CeedCallBackend(CeedGetData(ceed, &data));
208   CeedCallBackend(CeedVectorGetLength(vec, &length));
209 
210   // Order queue
211   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
212 
213   switch (copy_mode) {
214     case CEED_COPY_VALUES: {
215       if (!impl->d_array_owned)
216         CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
217       if (array) {
218         sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(array, impl->d_array, length, {e});
219         // Wait for copy to finish and handle exceptions.
220         CeedCallSycl(ceed, copy_event.wait_and_throw());
221       }
222       impl->d_array_borrowed = NULL;
223       impl->d_array          = impl->d_array_owned;
224     } break;
225     case CEED_OWN_POINTER:
226       if (impl->d_array_owned) {
227         // Wait for all work to finish before freeing memory
228         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
229         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
230       }
231       impl->d_array_owned    = array;
232       impl->d_array_borrowed = NULL;
233       impl->d_array          = impl->d_array_owned;
234       break;
235     case CEED_USE_POINTER:
236       if (impl->d_array_owned) {
237         // Wait for all work to finish before freeing memory
238         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
239         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
240       }
241       impl->d_array_owned    = NULL;
242       impl->d_array_borrowed = array;
243       impl->d_array          = impl->d_array_borrowed;
244       break;
245   }
246   return CEED_ERROR_SUCCESS;
247 }
248 
249 //------------------------------------------------------------------------------
250 // Set the array used by a vector,
251 //   freeing any previously allocated array if applicable
252 //------------------------------------------------------------------------------
253 static int CeedVectorSetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
254   CeedVector_Sycl *impl;
255 
256   CeedCallBackend(CeedVectorGetData(vec, &impl));
257 
258   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
259   switch (mem_type) {
260     case CEED_MEM_HOST:
261       return CeedVectorSetArrayHost_Sycl(vec, copy_mode, array);
262     case CEED_MEM_DEVICE:
263       return CeedVectorSetArrayDevice_Sycl(vec, copy_mode, array);
264   }
265   return CEED_ERROR_UNSUPPORTED;
266 }
267 
268 //------------------------------------------------------------------------------
269 // Set host array to value
270 //------------------------------------------------------------------------------
271 static int CeedHostSetValue_Sycl(CeedScalar *h_array, CeedSize length, CeedScalar val) {
272   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
273   return CEED_ERROR_SUCCESS;
274 }
275 
276 //------------------------------------------------------------------------------
277 // Set device array to value
278 //------------------------------------------------------------------------------
279 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, CeedScalar val) {
280   // Order queue
281   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
282   sycl_queue.fill(d_array, val, length, {e});
283   return CEED_ERROR_SUCCESS;
284 }
285 
286 //------------------------------------------------------------------------------
287 // Set a vector to a value,
288 //------------------------------------------------------------------------------
289 static int CeedVectorSetValue_Sycl(CeedVector vec, CeedScalar val) {
290   Ceed             ceed;
291   Ceed_Sycl       *data;
292   CeedSize         length;
293   CeedVector_Sycl *impl;
294 
295   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
296   CeedCallBackend(CeedVectorGetData(vec, &impl));
297   CeedCallBackend(CeedVectorGetLength(vec, &length));
298   CeedCallBackend(CeedGetData(ceed, &data));
299 
300   // Set value for synced device/host array
301   if (!impl->d_array && !impl->h_array) {
302     if (impl->d_array_borrowed) {
303       impl->d_array = impl->d_array_borrowed;
304     } else if (impl->h_array_borrowed) {
305       impl->h_array = impl->h_array_borrowed;
306     } else if (impl->d_array_owned) {
307       impl->d_array = impl->d_array_owned;
308     } else if (impl->h_array_owned) {
309       impl->h_array = impl->h_array_owned;
310     } else {
311       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
312     }
313   }
314   if (impl->d_array) {
315     CeedCallBackend(CeedDeviceSetValue_Sycl(data->sycl_queue, impl->d_array, length, val));
316     impl->h_array = NULL;
317   }
318   if (impl->h_array) {
319     CeedCallBackend(CeedHostSetValue_Sycl(impl->h_array, length, val));
320     impl->d_array = NULL;
321   }
322   return CEED_ERROR_SUCCESS;
323 }
324 
325 //------------------------------------------------------------------------------
326 // Vector Take Array
327 //------------------------------------------------------------------------------
328 static int CeedVectorTakeArray_Sycl(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
329   Ceed             ceed;
330   Ceed_Sycl       *data;
331   CeedVector_Sycl *impl;
332 
333   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
334   CeedCallBackend(CeedVectorGetData(vec, &impl));
335   CeedCallBackend(CeedGetData(ceed, &data));
336 
337   // Order queue
338   data->sycl_queue.ext_oneapi_submit_barrier();
339 
340   // Sync array to requested mem_type
341   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
342 
343   // Update pointer
344   switch (mem_type) {
345     case CEED_MEM_HOST:
346       (*array)               = impl->h_array_borrowed;
347       impl->h_array_borrowed = NULL;
348       impl->h_array          = NULL;
349       break;
350     case CEED_MEM_DEVICE:
351       (*array)               = impl->d_array_borrowed;
352       impl->d_array_borrowed = NULL;
353       impl->d_array          = NULL;
354       break;
355   }
356   return CEED_ERROR_SUCCESS;
357 }
358 
359 //------------------------------------------------------------------------------
360 // Core logic for array syncronization for GetArray.
361 //   If a different memory type is most up to date, this will perform a copy
362 //------------------------------------------------------------------------------
363 static int CeedVectorGetArrayCore_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
364   CeedVector_Sycl *impl;
365 
366   CeedCallBackend(CeedVectorGetData(vec, &impl));
367 
368   // Sync array to requested mem_type
369   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
370 
371   // Update pointer
372   switch (mem_type) {
373     case CEED_MEM_HOST:
374       *array = impl->h_array;
375       break;
376     case CEED_MEM_DEVICE:
377       *array = impl->d_array;
378       break;
379   }
380   return CEED_ERROR_SUCCESS;
381 }
382 
383 //------------------------------------------------------------------------------
384 // Get read-only access to a vector via the specified mem_type
385 //------------------------------------------------------------------------------
386 static int CeedVectorGetArrayRead_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
387   return CeedVectorGetArrayCore_Sycl(vec, mem_type, (CeedScalar **)array);
388 }
389 
390 //------------------------------------------------------------------------------
391 // Get read/write access to a vector via the specified mem_type
392 //------------------------------------------------------------------------------
393 static int CeedVectorGetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
394   CeedVector_Sycl *impl;
395 
396   CeedCallBackend(CeedVectorGetData(vec, &impl));
397   CeedCallBackend(CeedVectorGetArrayCore_Sycl(vec, mem_type, array));
398   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
399   switch (mem_type) {
400     case CEED_MEM_HOST:
401       impl->h_array = *array;
402       break;
403     case CEED_MEM_DEVICE:
404       impl->d_array = *array;
405       break;
406   }
407   return CEED_ERROR_SUCCESS;
408 }
409 
410 //------------------------------------------------------------------------------
411 // Get write access to a vector via the specified mem_type
412 //------------------------------------------------------------------------------
413 static int CeedVectorGetArrayWrite_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
414   bool             has_array_of_type = true;
415   CeedVector_Sycl *impl;
416 
417   CeedCallBackend(CeedVectorGetData(vec, &impl));
418   CeedCallBackend(CeedVectorHasArrayOfType_Sycl(vec, mem_type, &has_array_of_type));
419   if (!has_array_of_type) {
420     // Allocate if array is not yet allocated
421     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
422   } else {
423     // Select dirty array
424     switch (mem_type) {
425       case CEED_MEM_HOST:
426         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
427         else impl->h_array = impl->h_array_owned;
428         break;
429       case CEED_MEM_DEVICE:
430         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
431         else impl->d_array = impl->d_array_owned;
432     }
433   }
434   return CeedVectorGetArray_Sycl(vec, mem_type, array);
435 }
436 
437 //------------------------------------------------------------------------------
438 // Get the norm of a CeedVector
439 //------------------------------------------------------------------------------
440 static int CeedVectorNorm_Sycl(CeedVector vec, CeedNormType type, CeedScalar *norm) {
441   Ceed              ceed;
442   Ceed_Sycl        *data;
443   CeedSize          length;
444   const CeedScalar *d_array;
445   CeedVector_Sycl  *impl;
446 
447   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
448   CeedCallBackend(CeedVectorGetData(vec, &impl));
449   CeedCallBackend(CeedVectorGetLength(vec, &length));
450   CeedCallBackend(CeedGetData(ceed, &data));
451 
452   // Compute norm
453   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
454   switch (type) {
455     case CEED_NORM_1: {
456       // Order queue
457       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
458       auto        sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
459       data->sycl_queue.parallel_for(length, {e}, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += abs(d_array[i]); }).wait_and_throw();
460     } break;
461     case CEED_NORM_2: {
462       // Order queue
463       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
464       auto        sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
465       data->sycl_queue.parallel_for(length, {e}, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += (d_array[i] * d_array[i]); }).wait_and_throw();
466     } break;
467     case CEED_NORM_MAX: {
468       // Order queue
469       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
470       auto        maxReduction = sycl::reduction(impl->reduction_norm, sycl::maximum<>(), {sycl::property::reduction::initialize_to_identity{}});
471       data->sycl_queue.parallel_for(length, {e}, maxReduction, [=](sycl::id<1> i, auto &max) { max.combine(abs(d_array[i])); }).wait_and_throw();
472     } break;
473   }
474   // L2 norm - square root over reduced value
475   if (type == CEED_NORM_2) *norm = sqrt(*impl->reduction_norm);
476   else *norm = *impl->reduction_norm;
477   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
478   return CEED_ERROR_SUCCESS;
479 }
480 
481 //------------------------------------------------------------------------------
482 // Take reciprocal of a vector on host
483 //------------------------------------------------------------------------------
484 static int CeedHostReciprocal_Sycl(CeedScalar *h_array, CeedSize length) {
485   for (CeedSize i = 0; i < length; i++) {
486     if (std::fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
487   }
488   return CEED_ERROR_SUCCESS;
489 }
490 
491 //------------------------------------------------------------------------------
492 // Take reciprocal of a vector on device
493 //------------------------------------------------------------------------------
494 static int CeedDeviceReciprocal_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length) {
495   // Order queue
496   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
497   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) {
498     if (std::fabs(d_array[i]) > CEED_EPSILON) d_array[i] = 1. / d_array[i];
499   });
500   return CEED_ERROR_SUCCESS;
501 }
502 
503 //------------------------------------------------------------------------------
504 // Take reciprocal of a vector
505 //------------------------------------------------------------------------------
506 static int CeedVectorReciprocal_Sycl(CeedVector vec) {
507   Ceed             ceed;
508   Ceed_Sycl       *data;
509   CeedSize         length;
510   CeedVector_Sycl *impl;
511 
512   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
513   CeedCallBackend(CeedVectorGetData(vec, &impl));
514   CeedCallBackend(CeedVectorGetLength(vec, &length));
515   CeedCallBackend(CeedGetData(ceed, &data));
516 
517   // Set value for synced device/host array
518   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Sycl(data->sycl_queue, impl->d_array, length));
519   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Sycl(impl->h_array, length));
520   return CEED_ERROR_SUCCESS;
521 }
522 
523 //------------------------------------------------------------------------------
524 // Compute x = alpha x on the host
525 //------------------------------------------------------------------------------
526 static int CeedHostScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
527   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
528   return CEED_ERROR_SUCCESS;
529 }
530 
531 //------------------------------------------------------------------------------
532 // Compute x = alpha x on device
533 //------------------------------------------------------------------------------
534 static int CeedDeviceScale_Sycl(sycl::queue &sycl_queue, CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
535   // Order queue
536   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
537   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { x_array[i] *= alpha; });
538   return CEED_ERROR_SUCCESS;
539 }
540 
541 //------------------------------------------------------------------------------
542 // Compute x = alpha x
543 //------------------------------------------------------------------------------
544 static int CeedVectorScale_Sycl(CeedVector x, CeedScalar alpha) {
545   Ceed             ceed;
546   Ceed_Sycl       *data;
547   CeedSize         length;
548   CeedVector_Sycl *x_impl;
549 
550   CeedCallBackend(CeedVectorGetCeed(x, &ceed));
551   CeedCallBackend(CeedVectorGetData(x, &x_impl));
552   CeedCallBackend(CeedVectorGetLength(x, &length));
553   CeedCallBackend(CeedGetData(ceed, &data));
554 
555   // Set value for synced device/host array
556   if (x_impl->d_array) CeedCallBackend(CeedDeviceScale_Sycl(data->sycl_queue, x_impl->d_array, alpha, length));
557   if (x_impl->h_array) CeedCallBackend(CeedHostScale_Sycl(x_impl->h_array, alpha, length));
558   return CEED_ERROR_SUCCESS;
559 }
560 
561 //------------------------------------------------------------------------------
562 // Compute y = alpha x + y on the host
563 //------------------------------------------------------------------------------
564 static int CeedHostAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
565   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
566   return CEED_ERROR_SUCCESS;
567 }
568 
569 //------------------------------------------------------------------------------
570 // Compute y = alpha x + y on device
571 //------------------------------------------------------------------------------
572 static int CeedDeviceAXPY_Sycl(sycl::queue &sycl_queue, CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
573   // Order queue
574   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
575   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { y_array[i] += alpha * x_array[i]; });
576   return CEED_ERROR_SUCCESS;
577 }
578 
579 //------------------------------------------------------------------------------
580 // Compute y = alpha x + y
581 //------------------------------------------------------------------------------
582 static int CeedVectorAXPY_Sycl(CeedVector y, CeedScalar alpha, CeedVector x) {
583   Ceed             ceed;
584   Ceed_Sycl       *data;
585   CeedSize         length;
586   CeedVector_Sycl *y_impl, *x_impl;
587 
588   CeedCallBackend(CeedVectorGetCeed(y, &ceed));
589   CeedCallBackend(CeedVectorGetData(y, &y_impl));
590   CeedCallBackend(CeedVectorGetData(x, &x_impl));
591   CeedCallBackend(CeedVectorGetLength(y, &length));
592   CeedCallBackend(CeedGetData(ceed, &data));
593 
594   // Set value for synced device/host array
595   if (y_impl->d_array) {
596     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
597     CeedCallBackend(CeedDeviceAXPY_Sycl(data->sycl_queue, y_impl->d_array, alpha, x_impl->d_array, length));
598   }
599   if (y_impl->h_array) {
600     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
601     CeedCallBackend(CeedHostAXPY_Sycl(y_impl->h_array, alpha, x_impl->h_array, length));
602   }
603   return CEED_ERROR_SUCCESS;
604 }
605 
606 //------------------------------------------------------------------------------
607 // Compute the pointwise multiplication w = x .* y on the host
608 //------------------------------------------------------------------------------
609 static int CeedHostPointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
610   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
611   return CEED_ERROR_SUCCESS;
612 }
613 
614 //------------------------------------------------------------------------------
615 // Compute the pointwise multiplication w = x .* y on device (impl in .cu file)
616 //------------------------------------------------------------------------------
617 static int CeedDevicePointwiseMult_Sycl(sycl::queue &sycl_queue, CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
618   // Order queue
619   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
620   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { w_array[i] = x_array[i] * y_array[i]; });
621   return CEED_ERROR_SUCCESS;
622 }
623 
624 //------------------------------------------------------------------------------
625 // Compute the pointwise multiplication w = x .* y
626 //------------------------------------------------------------------------------
627 static int CeedVectorPointwiseMult_Sycl(CeedVector w, CeedVector x, CeedVector y) {
628   Ceed             ceed;
629   Ceed_Sycl       *data;
630   CeedSize         length;
631   CeedVector_Sycl *w_impl, *x_impl, *y_impl;
632 
633   CeedCallBackend(CeedVectorGetCeed(w, &ceed));
634   CeedCallBackend(CeedVectorGetData(w, &w_impl));
635   CeedCallBackend(CeedVectorGetData(x, &x_impl));
636   CeedCallBackend(CeedVectorGetData(y, &y_impl));
637   CeedCallBackend(CeedVectorGetLength(w, &length));
638   CeedCallBackend(CeedGetData(ceed, &data));
639 
640   // Set value for synced device/host array
641   if (!w_impl->d_array && !w_impl->h_array) {
642     CeedCallBackend(CeedVectorSetValue(w, 0.0));
643   }
644   if (w_impl->d_array) {
645     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
646     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
647     CeedCallBackend(CeedDevicePointwiseMult_Sycl(data->sycl_queue, w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
648   }
649   if (w_impl->h_array) {
650     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
651     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
652     CeedCallBackend(CeedHostPointwiseMult_Sycl(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
653   }
654   return CEED_ERROR_SUCCESS;
655 }
656 
657 //------------------------------------------------------------------------------
658 // Destroy the vector
659 //------------------------------------------------------------------------------
660 static int CeedVectorDestroy_Sycl(const CeedVector vec) {
661   Ceed             ceed;
662   Ceed_Sycl       *data;
663   CeedVector_Sycl *impl;
664 
665   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
666   CeedCallBackend(CeedVectorGetData(vec, &impl));
667   CeedCallBackend(CeedGetData(ceed, &data));
668 
669   // Wait for all work to finish before freeing memory
670   CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
671   CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
672   CeedCallSycl(ceed, sycl::free(impl->reduction_norm, data->sycl_context));
673 
674   CeedCallBackend(CeedFree(&impl->h_array_owned));
675   CeedCallBackend(CeedFree(&impl));
676   return CEED_ERROR_SUCCESS;
677 }
678 
679 //------------------------------------------------------------------------------
680 // Create a vector of the specified length (does not allocate memory)
681 //------------------------------------------------------------------------------
682 int CeedVectorCreate_Sycl(CeedSize n, CeedVector vec) {
683   Ceed             ceed;
684   Ceed_Sycl       *data;
685   CeedVector_Sycl *impl;
686 
687   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
688   CeedCallBackend(CeedGetData(ceed, &data));
689   CeedCallBackend(CeedCalloc(1, &impl));
690   CeedCallSycl(ceed, impl->reduction_norm = sycl::malloc_host<CeedScalar>(1, data->sycl_context));
691   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Sycl));
692   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Sycl));
693   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Sycl));
694   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Sycl));
695   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Sycl));
696   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Sycl));
697   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Sycl));
698   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Sycl));
699   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Sycl));
700   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Norm", CeedVectorNorm_Sycl));
701   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Sycl));
702   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Sycl));
703   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Scale", CeedVectorScale_Sycl));
704   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Sycl));
705   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Sycl));
706   CeedCallBackend(CeedVectorSetData(vec, impl));
707   return CEED_ERROR_SUCCESS;
708 }
709