xref: /libCEED/backends/sycl-ref/ceed-sycl-vector.sycl.cpp (revision 73441d2798f40b051fe979327dbc45bfa6942f05)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/backend.h>
9 #include <ceed/ceed.h>
10 
11 #include <cmath>
12 #include <string>
13 #include <sycl/sycl.hpp>
14 
15 #include "ceed-sycl-ref.hpp"
16 
17 //------------------------------------------------------------------------------
18 // Check if host/device sync is needed
19 //------------------------------------------------------------------------------
20 static inline int CeedVectorNeedSync_Sycl(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
21   bool             has_valid_array = false;
22   CeedVector_Sycl *impl;
23 
24   CeedCallBackend(CeedVectorGetData(vec, &impl));
25   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
26   switch (mem_type) {
27     case CEED_MEM_HOST:
28       *need_sync = has_valid_array && !impl->h_array;
29       break;
30     case CEED_MEM_DEVICE:
31       *need_sync = has_valid_array && !impl->d_array;
32       break;
33   }
34   return CEED_ERROR_SUCCESS;
35 }
36 
37 //------------------------------------------------------------------------------
38 // Sync host to device
39 //------------------------------------------------------------------------------
40 static inline int CeedVectorSyncH2D_Sycl(const CeedVector vec) {
41   Ceed             ceed;
42   Ceed_Sycl       *data;
43   CeedSize         length;
44   CeedVector_Sycl *impl;
45 
46   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
47   CeedCallBackend(CeedVectorGetData(vec, &impl));
48   CeedCallBackend(CeedGetData(ceed, &data));
49   CeedCheck(impl->h_array, ceed, CEED_ERROR_BACKEND, "No valid host data to sync to device");
50 
51   CeedCallBackend(CeedVectorGetLength(vec, &length));
52   if (impl->d_array_borrowed) {
53     impl->d_array = impl->d_array_borrowed;
54   } else if (impl->d_array_owned) {
55     impl->d_array = impl->d_array_owned;
56   } else {
57     CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
58     impl->d_array = impl->d_array_owned;
59   }
60 
61   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
62   // Copy from host to device
63   sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(impl->h_array, impl->d_array, length, {e});
64   // Wait for copy to finish and handle exceptions.
65   CeedCallSycl(ceed, copy_event.wait_and_throw());
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 //------------------------------------------------------------------------------
70 // Sync device to host
71 //------------------------------------------------------------------------------
72 static inline int CeedVectorSyncD2H_Sycl(const CeedVector vec) {
73   Ceed             ceed;
74   Ceed_Sycl       *data;
75   CeedSize         length;
76   CeedVector_Sycl *impl;
77 
78   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
79   CeedCallBackend(CeedVectorGetData(vec, &impl));
80   CeedCallBackend(CeedGetData(ceed, &data));
81 
82   CeedCheck(impl->d_array, ceed, CEED_ERROR_BACKEND, "No valid device data to sync to host");
83 
84   CeedCallBackend(CeedVectorGetLength(vec, &length));
85   if (impl->h_array_borrowed) {
86     impl->h_array = impl->h_array_borrowed;
87   } else if (impl->h_array_owned) {
88     impl->h_array = impl->h_array_owned;
89   } else {
90     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
91     impl->h_array = impl->h_array_owned;
92   }
93 
94   // Order queue
95   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
96   // Copy from device to host
97   sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(impl->d_array, impl->h_array, length, {e});
98   // Wait for copy to finish and handle exceptions.
99   CeedCallSycl(ceed, copy_event.wait_and_throw());
100   return CEED_ERROR_SUCCESS;
101 }
102 
103 //------------------------------------------------------------------------------
104 // Sync arrays
105 //------------------------------------------------------------------------------
106 static int CeedVectorSyncArray_Sycl(const CeedVector vec, CeedMemType mem_type) {
107   bool need_sync = false;
108 
109   // Check whether device/host sync is needed
110   CeedCallBackend(CeedVectorNeedSync_Sycl(vec, mem_type, &need_sync));
111   if (!need_sync) return CEED_ERROR_SUCCESS;
112 
113   switch (mem_type) {
114     case CEED_MEM_HOST:
115       return CeedVectorSyncD2H_Sycl(vec);
116     case CEED_MEM_DEVICE:
117       return CeedVectorSyncH2D_Sycl(vec);
118   }
119   return CEED_ERROR_UNSUPPORTED;
120 }
121 
122 //------------------------------------------------------------------------------
123 // Set all pointers as invalid
124 //------------------------------------------------------------------------------
125 static inline int CeedVectorSetAllInvalid_Sycl(const CeedVector vec) {
126   CeedVector_Sycl *impl;
127 
128   CeedCallBackend(CeedVectorGetData(vec, &impl));
129   impl->h_array = NULL;
130   impl->d_array = NULL;
131   return CEED_ERROR_SUCCESS;
132 }
133 
134 //------------------------------------------------------------------------------
135 // Check if CeedVector has any valid pointer
136 //------------------------------------------------------------------------------
137 static inline int CeedVectorHasValidArray_Sycl(const CeedVector vec, bool *has_valid_array) {
138   CeedVector_Sycl *impl;
139 
140   CeedCallBackend(CeedVectorGetData(vec, &impl));
141   *has_valid_array = impl->h_array || impl->d_array;
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 //------------------------------------------------------------------------------
146 // Check if has array of given type
147 //------------------------------------------------------------------------------
148 static inline int CeedVectorHasArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
149   CeedVector_Sycl *impl;
150 
151   CeedCallBackend(CeedVectorGetData(vec, &impl));
152   switch (mem_type) {
153     case CEED_MEM_HOST:
154       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
155       break;
156     case CEED_MEM_DEVICE:
157       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
158       break;
159   }
160   return CEED_ERROR_SUCCESS;
161 }
162 
163 //------------------------------------------------------------------------------
164 // Check if has borrowed array of given type
165 //------------------------------------------------------------------------------
166 static inline int CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
167   CeedVector_Sycl *impl;
168 
169   CeedCallBackend(CeedVectorGetData(vec, &impl));
170   switch (mem_type) {
171     case CEED_MEM_HOST:
172       *has_borrowed_array_of_type = impl->h_array_borrowed;
173       break;
174     case CEED_MEM_DEVICE:
175       *has_borrowed_array_of_type = impl->d_array_borrowed;
176       break;
177   }
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 //------------------------------------------------------------------------------
182 // Set array from host
183 //------------------------------------------------------------------------------
184 static int CeedVectorSetArrayHost_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
185   CeedVector_Sycl *impl;
186 
187   CeedCallBackend(CeedVectorGetData(vec, &impl));
188   switch (copy_mode) {
189     case CEED_COPY_VALUES: {
190       if (!impl->h_array_owned) {
191         CeedSize length;
192 
193         CeedCallBackend(CeedVectorGetLength(vec, &length));
194         CeedCallBackend(CeedMalloc(length, &impl->h_array_owned));
195       }
196       impl->h_array_borrowed = NULL;
197       impl->h_array          = impl->h_array_owned;
198       if (array) {
199         CeedSize length;
200         size_t   bytes;
201 
202         CeedCallBackend(CeedVectorGetLength(vec, &length));
203         bytes = length * sizeof(CeedScalar);
204         memcpy(impl->h_array, array, bytes);
205       }
206     } break;
207     case CEED_OWN_POINTER:
208       CeedCallBackend(CeedFree(&impl->h_array_owned));
209       impl->h_array_owned    = array;
210       impl->h_array_borrowed = NULL;
211       impl->h_array          = array;
212       break;
213     case CEED_USE_POINTER:
214       CeedCallBackend(CeedFree(&impl->h_array_owned));
215       impl->h_array_borrowed = array;
216       impl->h_array          = array;
217       break;
218   }
219   return CEED_ERROR_SUCCESS;
220 }
221 
222 //------------------------------------------------------------------------------
223 // Set array from device
224 //------------------------------------------------------------------------------
225 static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
226   Ceed             ceed;
227   Ceed_Sycl       *data;
228   CeedVector_Sycl *impl;
229 
230   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
231   CeedCallBackend(CeedVectorGetData(vec, &impl));
232   CeedCallBackend(CeedGetData(ceed, &data));
233 
234   // Order queue
235   sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier();
236 
237   switch (copy_mode) {
238     case CEED_COPY_VALUES: {
239       CeedSize length;
240 
241       CeedCallBackend(CeedVectorGetLength(vec, &length));
242       if (!impl->d_array_owned) {
243         CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
244       }
245       impl->d_array = impl->d_array_owned;
246       if (array) {
247         sycl::event copy_event = data->sycl_queue.copy<CeedScalar>(array, impl->d_array, length, {e});
248         // Wait for copy to finish and handle exceptions.
249         CeedCallSycl(ceed, copy_event.wait_and_throw());
250       }
251     } break;
252     case CEED_OWN_POINTER:
253       if (impl->d_array_owned) {
254         // Wait for all work to finish before freeing memory
255         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
256         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
257       }
258       impl->d_array_owned    = array;
259       impl->d_array_borrowed = NULL;
260       impl->d_array          = array;
261       break;
262     case CEED_USE_POINTER:
263       if (impl->d_array_owned) {
264         // Wait for all work to finish before freeing memory
265         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
266         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
267       }
268       impl->d_array_owned    = NULL;
269       impl->d_array_borrowed = array;
270       impl->d_array          = array;
271       break;
272   }
273   return CEED_ERROR_SUCCESS;
274 }
275 
276 //------------------------------------------------------------------------------
277 // Set the array used by a vector,
278 //   freeing any previously allocated array if applicable
279 //------------------------------------------------------------------------------
280 static int CeedVectorSetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
281   CeedVector_Sycl *impl;
282 
283   CeedCallBackend(CeedVectorGetData(vec, &impl));
284 
285   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
286   switch (mem_type) {
287     case CEED_MEM_HOST:
288       return CeedVectorSetArrayHost_Sycl(vec, copy_mode, array);
289     case CEED_MEM_DEVICE:
290       return CeedVectorSetArrayDevice_Sycl(vec, copy_mode, array);
291   }
292   return CEED_ERROR_UNSUPPORTED;
293 }
294 
295 //------------------------------------------------------------------------------
296 // Set host array to value
297 //------------------------------------------------------------------------------
298 static int CeedHostSetValue_Sycl(CeedScalar *h_array, CeedSize length, CeedScalar val) {
299   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
300   return CEED_ERROR_SUCCESS;
301 }
302 
303 //------------------------------------------------------------------------------
304 // Set device array to value
305 //------------------------------------------------------------------------------
306 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, CeedScalar val) {
307   // Order queue
308   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
309   sycl_queue.fill(d_array, val, length, {e});
310   return CEED_ERROR_SUCCESS;
311 }
312 
313 //------------------------------------------------------------------------------
314 // Set a vector to a value,
315 //------------------------------------------------------------------------------
316 static int CeedVectorSetValue_Sycl(CeedVector vec, CeedScalar val) {
317   Ceed             ceed;
318   Ceed_Sycl       *data;
319   CeedSize         length;
320   CeedVector_Sycl *impl;
321 
322   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
323   CeedCallBackend(CeedVectorGetData(vec, &impl));
324   CeedCallBackend(CeedVectorGetLength(vec, &length));
325   CeedCallBackend(CeedGetData(ceed, &data));
326 
327   // Set value for synced device/host array
328   if (!impl->d_array && !impl->h_array) {
329     if (impl->d_array_borrowed) {
330       impl->d_array = impl->d_array_borrowed;
331     } else if (impl->h_array_borrowed) {
332       impl->h_array = impl->h_array_borrowed;
333     } else if (impl->d_array_owned) {
334       impl->d_array = impl->d_array_owned;
335     } else if (impl->h_array_owned) {
336       impl->h_array = impl->h_array_owned;
337     } else {
338       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
339     }
340   }
341   if (impl->d_array) {
342     CeedCallBackend(CeedDeviceSetValue_Sycl(data->sycl_queue, impl->d_array, length, val));
343     impl->h_array = NULL;
344   }
345   if (impl->h_array) {
346     CeedCallBackend(CeedHostSetValue_Sycl(impl->h_array, length, val));
347     impl->d_array = NULL;
348   }
349   return CEED_ERROR_SUCCESS;
350 }
351 
352 //------------------------------------------------------------------------------
353 // Vector Take Array
354 //------------------------------------------------------------------------------
355 static int CeedVectorTakeArray_Sycl(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
356   Ceed             ceed;
357   Ceed_Sycl       *data;
358   CeedVector_Sycl *impl;
359 
360   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
361   CeedCallBackend(CeedVectorGetData(vec, &impl));
362   CeedCallBackend(CeedGetData(ceed, &data));
363 
364   // Order queue
365   data->sycl_queue.ext_oneapi_submit_barrier();
366 
367   // Sync array to requested mem_type
368   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
369 
370   // Update pointer
371   switch (mem_type) {
372     case CEED_MEM_HOST:
373       (*array)               = impl->h_array_borrowed;
374       impl->h_array_borrowed = NULL;
375       impl->h_array          = NULL;
376       break;
377     case CEED_MEM_DEVICE:
378       (*array)               = impl->d_array_borrowed;
379       impl->d_array_borrowed = NULL;
380       impl->d_array          = NULL;
381       break;
382   }
383   return CEED_ERROR_SUCCESS;
384 }
385 
386 //------------------------------------------------------------------------------
387 // Core logic for array syncronization for GetArray.
388 //   If a different memory type is most up to date, this will perform a copy
389 //------------------------------------------------------------------------------
390 static int CeedVectorGetArrayCore_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
391   CeedVector_Sycl *impl;
392 
393   CeedCallBackend(CeedVectorGetData(vec, &impl));
394 
395   // Sync array to requested mem_type
396   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
397 
398   // Update pointer
399   switch (mem_type) {
400     case CEED_MEM_HOST:
401       *array = impl->h_array;
402       break;
403     case CEED_MEM_DEVICE:
404       *array = impl->d_array;
405       break;
406   }
407   return CEED_ERROR_SUCCESS;
408 }
409 
410 //------------------------------------------------------------------------------
411 // Get read-only access to a vector via the specified mem_type
412 //------------------------------------------------------------------------------
413 static int CeedVectorGetArrayRead_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
414   return CeedVectorGetArrayCore_Sycl(vec, mem_type, (CeedScalar **)array);
415 }
416 
417 //------------------------------------------------------------------------------
418 // Get read/write access to a vector via the specified mem_type
419 //------------------------------------------------------------------------------
420 static int CeedVectorGetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
421   CeedVector_Sycl *impl;
422 
423   CeedCallBackend(CeedVectorGetData(vec, &impl));
424   CeedCallBackend(CeedVectorGetArrayCore_Sycl(vec, mem_type, array));
425   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
426   switch (mem_type) {
427     case CEED_MEM_HOST:
428       impl->h_array = *array;
429       break;
430     case CEED_MEM_DEVICE:
431       impl->d_array = *array;
432       break;
433   }
434   return CEED_ERROR_SUCCESS;
435 }
436 
437 //------------------------------------------------------------------------------
438 // Get write access to a vector via the specified mem_type
439 //------------------------------------------------------------------------------
440 static int CeedVectorGetArrayWrite_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
441   bool             has_array_of_type = true;
442   CeedVector_Sycl *impl;
443 
444   CeedCallBackend(CeedVectorGetData(vec, &impl));
445   CeedCallBackend(CeedVectorHasArrayOfType_Sycl(vec, mem_type, &has_array_of_type));
446   if (!has_array_of_type) {
447     // Allocate if array is not yet allocated
448     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
449   } else {
450     // Select dirty array
451     switch (mem_type) {
452       case CEED_MEM_HOST:
453         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
454         else impl->h_array = impl->h_array_owned;
455         break;
456       case CEED_MEM_DEVICE:
457         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
458         else impl->d_array = impl->d_array_owned;
459     }
460   }
461   return CeedVectorGetArray_Sycl(vec, mem_type, array);
462 }
463 
464 //------------------------------------------------------------------------------
465 // Get the norm of a CeedVector
466 //------------------------------------------------------------------------------
467 static int CeedVectorNorm_Sycl(CeedVector vec, CeedNormType type, CeedScalar *norm) {
468   Ceed              ceed;
469   Ceed_Sycl        *data;
470   CeedSize          length;
471   const CeedScalar *d_array;
472   CeedVector_Sycl  *impl;
473 
474   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
475   CeedCallBackend(CeedVectorGetData(vec, &impl));
476   CeedCallBackend(CeedVectorGetLength(vec, &length));
477   CeedCallBackend(CeedGetData(ceed, &data));
478 
479   // Compute norm
480   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
481   switch (type) {
482     case CEED_NORM_1: {
483       // Order queue
484       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
485       auto        sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
486       data->sycl_queue.parallel_for(length, {e}, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += abs(d_array[i]); }).wait_and_throw();
487     } break;
488     case CEED_NORM_2: {
489       // Order queue
490       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
491       auto        sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
492       data->sycl_queue.parallel_for(length, {e}, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += (d_array[i] * d_array[i]); }).wait_and_throw();
493     } break;
494     case CEED_NORM_MAX: {
495       // Order queue
496       sycl::event e            = data->sycl_queue.ext_oneapi_submit_barrier();
497       auto        maxReduction = sycl::reduction(impl->reduction_norm, sycl::maximum<>(), {sycl::property::reduction::initialize_to_identity{}});
498       data->sycl_queue.parallel_for(length, {e}, maxReduction, [=](sycl::id<1> i, auto &max) { max.combine(abs(d_array[i])); }).wait_and_throw();
499     } break;
500   }
501   // L2 norm - square root over reduced value
502   if (type == CEED_NORM_2) *norm = sqrt(*impl->reduction_norm);
503   else *norm = *impl->reduction_norm;
504   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
505   return CEED_ERROR_SUCCESS;
506 }
507 
508 //------------------------------------------------------------------------------
509 // Take reciprocal of a vector on host
510 //------------------------------------------------------------------------------
511 static int CeedHostReciprocal_Sycl(CeedScalar *h_array, CeedSize length) {
512   for (CeedSize i = 0; i < length; i++) {
513     if (std::fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
514   }
515   return CEED_ERROR_SUCCESS;
516 }
517 
518 //------------------------------------------------------------------------------
519 // Take reciprocal of a vector on device
520 //------------------------------------------------------------------------------
521 static int CeedDeviceReciprocal_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length) {
522   // Order queue
523   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
524   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) {
525     if (std::fabs(d_array[i]) > CEED_EPSILON) d_array[i] = 1. / d_array[i];
526   });
527   return CEED_ERROR_SUCCESS;
528 }
529 
530 //------------------------------------------------------------------------------
531 // Take reciprocal of a vector
532 //------------------------------------------------------------------------------
533 static int CeedVectorReciprocal_Sycl(CeedVector vec) {
534   Ceed             ceed;
535   Ceed_Sycl       *data;
536   CeedSize         length;
537   CeedVector_Sycl *impl;
538 
539   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
540   CeedCallBackend(CeedVectorGetData(vec, &impl));
541   CeedCallBackend(CeedVectorGetLength(vec, &length));
542   CeedCallBackend(CeedGetData(ceed, &data));
543 
544   // Set value for synced device/host array
545   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Sycl(data->sycl_queue, impl->d_array, length));
546   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Sycl(impl->h_array, length));
547   return CEED_ERROR_SUCCESS;
548 }
549 
550 //------------------------------------------------------------------------------
551 // Compute x = alpha x on the host
552 //------------------------------------------------------------------------------
553 static int CeedHostScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
554   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
555   return CEED_ERROR_SUCCESS;
556 }
557 
558 //------------------------------------------------------------------------------
559 // Compute x = alpha x on device
560 //------------------------------------------------------------------------------
561 static int CeedDeviceScale_Sycl(sycl::queue &sycl_queue, CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
562   // Order queue
563   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
564   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { x_array[i] *= alpha; });
565   return CEED_ERROR_SUCCESS;
566 }
567 
568 //------------------------------------------------------------------------------
569 // Compute x = alpha x
570 //------------------------------------------------------------------------------
571 static int CeedVectorScale_Sycl(CeedVector x, CeedScalar alpha) {
572   Ceed             ceed;
573   Ceed_Sycl       *data;
574   CeedSize         length;
575   CeedVector_Sycl *x_impl;
576 
577   CeedCallBackend(CeedVectorGetCeed(x, &ceed));
578   CeedCallBackend(CeedVectorGetData(x, &x_impl));
579   CeedCallBackend(CeedVectorGetLength(x, &length));
580   CeedCallBackend(CeedGetData(ceed, &data));
581 
582   // Set value for synced device/host array
583   if (x_impl->d_array) CeedCallBackend(CeedDeviceScale_Sycl(data->sycl_queue, x_impl->d_array, alpha, length));
584   if (x_impl->h_array) CeedCallBackend(CeedHostScale_Sycl(x_impl->h_array, alpha, length));
585   return CEED_ERROR_SUCCESS;
586 }
587 
588 //------------------------------------------------------------------------------
589 // Compute y = alpha x + y on the host
590 //------------------------------------------------------------------------------
591 static int CeedHostAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
592   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
593   return CEED_ERROR_SUCCESS;
594 }
595 
596 //------------------------------------------------------------------------------
597 // Compute y = alpha x + y on device
598 //------------------------------------------------------------------------------
599 static int CeedDeviceAXPY_Sycl(sycl::queue &sycl_queue, CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
600   // Order queue
601   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
602   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { y_array[i] += alpha * x_array[i]; });
603   return CEED_ERROR_SUCCESS;
604 }
605 
606 //------------------------------------------------------------------------------
607 // Compute y = alpha x + y
608 //------------------------------------------------------------------------------
609 static int CeedVectorAXPY_Sycl(CeedVector y, CeedScalar alpha, CeedVector x) {
610   Ceed             ceed;
611   Ceed_Sycl       *data;
612   CeedSize         length;
613   CeedVector_Sycl *y_impl, *x_impl;
614 
615   CeedCallBackend(CeedVectorGetCeed(y, &ceed));
616   CeedCallBackend(CeedVectorGetData(y, &y_impl));
617   CeedCallBackend(CeedVectorGetData(x, &x_impl));
618   CeedCallBackend(CeedVectorGetLength(y, &length));
619   CeedCallBackend(CeedGetData(ceed, &data));
620 
621   // Set value for synced device/host array
622   if (y_impl->d_array) {
623     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
624     CeedCallBackend(CeedDeviceAXPY_Sycl(data->sycl_queue, y_impl->d_array, alpha, x_impl->d_array, length));
625   }
626   if (y_impl->h_array) {
627     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
628     CeedCallBackend(CeedHostAXPY_Sycl(y_impl->h_array, alpha, x_impl->h_array, length));
629   }
630   return CEED_ERROR_SUCCESS;
631 }
632 
633 //------------------------------------------------------------------------------
634 // Compute the pointwise multiplication w = x .* y on the host
635 //------------------------------------------------------------------------------
636 static int CeedHostPointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
637   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
638   return CEED_ERROR_SUCCESS;
639 }
640 
641 //------------------------------------------------------------------------------
642 // Compute the pointwise multiplication w = x .* y on device (impl in .cu file)
643 //------------------------------------------------------------------------------
644 static int CeedDevicePointwiseMult_Sycl(sycl::queue &sycl_queue, CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
645   // Order queue
646   sycl::event e = sycl_queue.ext_oneapi_submit_barrier();
647   sycl_queue.parallel_for(length, {e}, [=](sycl::id<1> i) { w_array[i] = x_array[i] * y_array[i]; });
648   return CEED_ERROR_SUCCESS;
649 }
650 
651 //------------------------------------------------------------------------------
652 // Compute the pointwise multiplication w = x .* y
653 //------------------------------------------------------------------------------
654 static int CeedVectorPointwiseMult_Sycl(CeedVector w, CeedVector x, CeedVector y) {
655   Ceed             ceed;
656   Ceed_Sycl       *data;
657   CeedSize         length;
658   CeedVector_Sycl *w_impl, *x_impl, *y_impl;
659 
660   CeedCallBackend(CeedVectorGetCeed(w, &ceed));
661   CeedCallBackend(CeedVectorGetData(w, &w_impl));
662   CeedCallBackend(CeedVectorGetData(x, &x_impl));
663   CeedCallBackend(CeedVectorGetData(y, &y_impl));
664   CeedCallBackend(CeedVectorGetLength(w, &length));
665   CeedCallBackend(CeedGetData(ceed, &data));
666 
667   // Set value for synced device/host array
668   if (!w_impl->d_array && !w_impl->h_array) {
669     CeedCallBackend(CeedVectorSetValue(w, 0.0));
670   }
671   if (w_impl->d_array) {
672     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
673     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
674     CeedCallBackend(CeedDevicePointwiseMult_Sycl(data->sycl_queue, w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
675   }
676   if (w_impl->h_array) {
677     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
678     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
679     CeedCallBackend(CeedHostPointwiseMult_Sycl(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
680   }
681   return CEED_ERROR_SUCCESS;
682 }
683 
684 //------------------------------------------------------------------------------
685 // Destroy the vector
686 //------------------------------------------------------------------------------
687 static int CeedVectorDestroy_Sycl(const CeedVector vec) {
688   Ceed             ceed;
689   Ceed_Sycl       *data;
690   CeedVector_Sycl *impl;
691 
692   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
693   CeedCallBackend(CeedVectorGetData(vec, &impl));
694   CeedCallBackend(CeedGetData(ceed, &data));
695 
696   // Wait for all work to finish before freeing memory
697   CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
698   CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
699   CeedCallSycl(ceed, sycl::free(impl->reduction_norm, data->sycl_context));
700 
701   CeedCallBackend(CeedFree(&impl->h_array_owned));
702   CeedCallBackend(CeedFree(&impl));
703   return CEED_ERROR_SUCCESS;
704 }
705 
706 //------------------------------------------------------------------------------
707 // Create a vector of the specified length (does not allocate memory)
708 //------------------------------------------------------------------------------
709 int CeedVectorCreate_Sycl(CeedSize n, CeedVector vec) {
710   Ceed             ceed;
711   Ceed_Sycl       *data;
712   CeedVector_Sycl *impl;
713 
714   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
715   CeedCallBackend(CeedGetData(ceed, &data));
716   CeedCallBackend(CeedCalloc(1, &impl));
717   CeedCallSycl(ceed, impl->reduction_norm = sycl::malloc_host<CeedScalar>(1, data->sycl_context));
718   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Sycl));
719   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Sycl));
720   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Sycl));
721   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Sycl));
722   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Sycl));
723   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Sycl));
724   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Sycl));
725   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Sycl));
726   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Sycl));
727   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Norm", CeedVectorNorm_Sycl));
728   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Sycl));
729   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Sycl));
730   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Scale", CeedVectorScale_Sycl));
731   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Sycl));
732   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Sycl));
733   CeedCallBackend(CeedVectorSetData(vec, impl));
734   return CEED_ERROR_SUCCESS;
735 }
736