xref: /libCEED/rust/libceed-sys/c-src/backends/sycl-ref/ceed-sycl-vector.sycl.cpp (revision 9bc663991d6482bcb1d60b1f116148f11db83fa1)
1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/backend.h>
9 #include <ceed/ceed.h>
10 
11 #include <cmath>
12 #include <string>
13 #include <sycl/sycl.hpp>
14 
15 #include "ceed-sycl-ref.hpp"
16 
17 //------------------------------------------------------------------------------
18 // Check if host/device sync is needed
19 //------------------------------------------------------------------------------
20 static inline int CeedVectorNeedSync_Sycl(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
21   bool             has_valid_array = false;
22   CeedVector_Sycl *impl;
23 
24   CeedCallBackend(CeedVectorGetData(vec, &impl));
25   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
26   switch (mem_type) {
27     case CEED_MEM_HOST:
28       *need_sync = has_valid_array && !impl->h_array;
29       break;
30     case CEED_MEM_DEVICE:
31       *need_sync = has_valid_array && !impl->d_array;
32       break;
33   }
34   return CEED_ERROR_SUCCESS;
35 }
36 
37 //------------------------------------------------------------------------------
38 // Sync host to device
39 //------------------------------------------------------------------------------
40 static inline int CeedVectorSyncH2D_Sycl(const CeedVector vec) {
41   Ceed             ceed;
42   Ceed_Sycl       *data;
43   CeedSize         length;
44   CeedVector_Sycl *impl;
45 
46   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
47   CeedCallBackend(CeedGetData(ceed, &data));
48   CeedCallBackend(CeedVectorGetData(vec, &impl));
49 
50   CeedCheck(impl->h_array, ceed, CEED_ERROR_BACKEND, "No valid host data to sync to device");
51 
52   CeedCallBackend(CeedVectorGetLength(vec, &length));
53   if (impl->d_array_borrowed) {
54     impl->d_array = impl->d_array_borrowed;
55   } else if (impl->d_array_owned) {
56     impl->d_array = impl->d_array_owned;
57   } else {
58     CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
59     impl->d_array = impl->d_array_owned;
60   }
61 
62   // Copy from host to device
63   std::vector<sycl::event> e;
64 
65   if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
66   CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(impl->h_array, impl->d_array, length, e).wait_and_throw());
67   CeedCallBackend(CeedDestroy(&ceed));
68   return CEED_ERROR_SUCCESS;
69 }
70 
71 //------------------------------------------------------------------------------
72 // Sync device to host
73 //------------------------------------------------------------------------------
74 static inline int CeedVectorSyncD2H_Sycl(const CeedVector vec) {
75   Ceed             ceed;
76   Ceed_Sycl       *data;
77   CeedSize         length;
78   CeedVector_Sycl *impl;
79 
80   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
81   CeedCallBackend(CeedGetData(ceed, &data));
82   CeedCallBackend(CeedVectorGetData(vec, &impl));
83 
84   CeedCheck(impl->d_array, ceed, CEED_ERROR_BACKEND, "No valid device data to sync to host");
85 
86   CeedCallBackend(CeedVectorGetLength(vec, &length));
87   if (impl->h_array_borrowed) {
88     impl->h_array = impl->h_array_borrowed;
89   } else if (impl->h_array_owned) {
90     impl->h_array = impl->h_array_owned;
91   } else {
92     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
93     impl->h_array = impl->h_array_owned;
94   }
95 
96   // Copy from device to host
97   std::vector<sycl::event> e;
98 
99   if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
100   CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(impl->d_array, impl->h_array, length, e).wait_and_throw());
101   CeedCallBackend(CeedDestroy(&ceed));
102   return CEED_ERROR_SUCCESS;
103 }
104 
105 //------------------------------------------------------------------------------
106 // Sync arrays
107 //------------------------------------------------------------------------------
108 static int CeedVectorSyncArray_Sycl(const CeedVector vec, CeedMemType mem_type) {
109   bool need_sync = false;
110 
111   // Check whether device/host sync is needed
112   CeedCallBackend(CeedVectorNeedSync_Sycl(vec, mem_type, &need_sync));
113   if (!need_sync) return CEED_ERROR_SUCCESS;
114 
115   switch (mem_type) {
116     case CEED_MEM_HOST:
117       return CeedVectorSyncD2H_Sycl(vec);
118     case CEED_MEM_DEVICE:
119       return CeedVectorSyncH2D_Sycl(vec);
120   }
121   return CEED_ERROR_UNSUPPORTED;
122 }
123 
124 //------------------------------------------------------------------------------
125 // Set all pointers as invalid
126 //------------------------------------------------------------------------------
127 static inline int CeedVectorSetAllInvalid_Sycl(const CeedVector vec) {
128   CeedVector_Sycl *impl;
129 
130   CeedCallBackend(CeedVectorGetData(vec, &impl));
131   impl->h_array = NULL;
132   impl->d_array = NULL;
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Check if CeedVector has any valid pointer
138 //------------------------------------------------------------------------------
139 static inline int CeedVectorHasValidArray_Sycl(const CeedVector vec, bool *has_valid_array) {
140   CeedVector_Sycl *impl;
141 
142   CeedCallBackend(CeedVectorGetData(vec, &impl));
143   *has_valid_array = impl->h_array || impl->d_array;
144   return CEED_ERROR_SUCCESS;
145 }
146 
147 //------------------------------------------------------------------------------
148 // Check if has array of given type
149 //------------------------------------------------------------------------------
150 static inline int CeedVectorHasArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
151   CeedVector_Sycl *impl;
152 
153   CeedCallBackend(CeedVectorGetData(vec, &impl));
154   switch (mem_type) {
155     case CEED_MEM_HOST:
156       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
157       break;
158     case CEED_MEM_DEVICE:
159       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
160       break;
161   }
162   return CEED_ERROR_SUCCESS;
163 }
164 
165 //------------------------------------------------------------------------------
166 // Check if has borrowed array of given type
167 //------------------------------------------------------------------------------
168 static inline int CeedVectorHasBorrowedArrayOfType_Sycl(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
169   CeedVector_Sycl *impl;
170 
171   CeedCallBackend(CeedVectorGetData(vec, &impl));
172   switch (mem_type) {
173     case CEED_MEM_HOST:
174       *has_borrowed_array_of_type = impl->h_array_borrowed;
175       break;
176     case CEED_MEM_DEVICE:
177       *has_borrowed_array_of_type = impl->d_array_borrowed;
178       break;
179   }
180   return CEED_ERROR_SUCCESS;
181 }
182 
183 //------------------------------------------------------------------------------
184 // Set array from host
185 //------------------------------------------------------------------------------
186 static int CeedVectorSetArrayHost_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
187   CeedSize         length;
188   CeedVector_Sycl *impl;
189 
190   CeedCallBackend(CeedVectorGetData(vec, &impl));
191   CeedCallBackend(CeedVectorGetLength(vec, &length));
192 
193   CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
194                                              (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
195   return CEED_ERROR_SUCCESS;
196 }
197 
198 //------------------------------------------------------------------------------
199 // Set array from device
200 //------------------------------------------------------------------------------
201 static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
202   CeedSize         length;
203   Ceed             ceed;
204   Ceed_Sycl       *data;
205   CeedVector_Sycl *impl;
206 
207   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
208   CeedCallBackend(CeedVectorGetData(vec, &impl));
209   CeedCallBackend(CeedGetData(ceed, &data));
210   CeedCallBackend(CeedVectorGetLength(vec, &length));
211 
212   // Order queue if needed.
213   std::vector<sycl::event> e;
214 
215   if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
216 
217   switch (copy_mode) {
218     case CEED_COPY_VALUES: {
219       if (!impl->d_array_owned) {
220         CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device, data->sycl_context));
221       }
222       if (array) {
223         // Wait for copy to finish and handle exceptions.
224         CeedCallSycl(ceed, data->sycl_queue.copy<CeedScalar>(array, impl->d_array_owned, length, e).wait_and_throw());
225       }
226       impl->d_array_borrowed = NULL;
227       impl->d_array          = impl->d_array_owned;
228     } break;
229     case CEED_OWN_POINTER:
230       if (impl->d_array_owned) {
231         // Wait for all work to finish before freeing memory
232         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
233         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
234       }
235       impl->d_array_owned    = array;
236       impl->d_array_borrowed = NULL;
237       impl->d_array          = impl->d_array_owned;
238       break;
239     case CEED_USE_POINTER:
240       if (impl->d_array_owned) {
241         // Wait for all work to finish before freeing memory
242         CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
243         CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
244       }
245       impl->d_array_owned    = NULL;
246       impl->d_array_borrowed = array;
247       impl->d_array          = impl->d_array_borrowed;
248       break;
249   }
250   CeedCallBackend(CeedDestroy(&ceed));
251   return CEED_ERROR_SUCCESS;
252 }
253 
254 //------------------------------------------------------------------------------
255 // Set the array used by a vector,
256 //   freeing any previously allocated array if applicable
257 //------------------------------------------------------------------------------
258 static int CeedVectorSetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
259   CeedVector_Sycl *impl;
260 
261   CeedCallBackend(CeedVectorGetData(vec, &impl));
262 
263   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
264   switch (mem_type) {
265     case CEED_MEM_HOST:
266       return CeedVectorSetArrayHost_Sycl(vec, copy_mode, array);
267     case CEED_MEM_DEVICE:
268       return CeedVectorSetArrayDevice_Sycl(vec, copy_mode, array);
269   }
270   return CEED_ERROR_UNSUPPORTED;
271 }
272 
273 //------------------------------------------------------------------------------
274 // Set host array to value
275 //------------------------------------------------------------------------------
276 static int CeedHostSetValue_Sycl(CeedScalar *h_array, CeedSize length, CeedScalar val) {
277   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
278   return CEED_ERROR_SUCCESS;
279 }
280 
281 //------------------------------------------------------------------------------
282 // Set device array to value
283 //------------------------------------------------------------------------------
284 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, CeedScalar val) {
285   std::vector<sycl::event> e;
286 
287   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
288   sycl_queue.fill(d_array, val, length, e);
289   return CEED_ERROR_SUCCESS;
290 }
291 
292 //------------------------------------------------------------------------------
293 // Set a vector to a value,
294 //------------------------------------------------------------------------------
295 static int CeedVectorSetValue_Sycl(CeedVector vec, CeedScalar val) {
296   Ceed             ceed;
297   Ceed_Sycl       *data;
298   CeedSize         length;
299   CeedVector_Sycl *impl;
300 
301   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
302   CeedCallBackend(CeedGetData(ceed, &data));
303   CeedCallBackend(CeedDestroy(&ceed));
304   CeedCallBackend(CeedVectorGetData(vec, &impl));
305   CeedCallBackend(CeedVectorGetLength(vec, &length));
306 
307   // Set value for synced device/host array
308   if (!impl->d_array && !impl->h_array) {
309     if (impl->d_array_borrowed) {
310       impl->d_array = impl->d_array_borrowed;
311     } else if (impl->h_array_borrowed) {
312       impl->h_array = impl->h_array_borrowed;
313     } else if (impl->d_array_owned) {
314       impl->d_array = impl->d_array_owned;
315     } else if (impl->h_array_owned) {
316       impl->h_array = impl->h_array_owned;
317     } else {
318       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
319     }
320   }
321   if (impl->d_array) {
322     CeedCallBackend(CeedDeviceSetValue_Sycl(data->sycl_queue, impl->d_array, length, val));
323     impl->h_array = NULL;
324   }
325   if (impl->h_array) {
326     CeedCallBackend(CeedHostSetValue_Sycl(impl->h_array, length, val));
327     impl->d_array = NULL;
328   }
329   return CEED_ERROR_SUCCESS;
330 }
331 
332 //------------------------------------------------------------------------------
333 // Vector Take Array
334 //------------------------------------------------------------------------------
335 static int CeedVectorTakeArray_Sycl(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
336   Ceed             ceed;
337   Ceed_Sycl       *data;
338   CeedVector_Sycl *impl;
339 
340   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
341   CeedCallBackend(CeedGetData(ceed, &data));
342   CeedCallBackend(CeedVectorGetData(vec, &impl));
343   CeedCallBackend(CeedDestroy(&ceed));
344   CeedCallBackend(CeedVectorGetData(vec, &impl));
345 
346   // Order queue if needed
347   if (!data->sycl_queue.is_in_order()) data->sycl_queue.ext_oneapi_submit_barrier();
348 
349   // Sync array to requested mem_type
350   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
351 
352   // Update pointer
353   switch (mem_type) {
354     case CEED_MEM_HOST:
355       (*array)               = impl->h_array_borrowed;
356       impl->h_array_borrowed = NULL;
357       impl->h_array          = NULL;
358       break;
359     case CEED_MEM_DEVICE:
360       (*array)               = impl->d_array_borrowed;
361       impl->d_array_borrowed = NULL;
362       impl->d_array          = NULL;
363       break;
364   }
365   return CEED_ERROR_SUCCESS;
366 }
367 
368 //------------------------------------------------------------------------------
369 // Core logic for array syncronization for GetArray.
370 //   If a different memory type is most up to date, this will perform a copy
371 //------------------------------------------------------------------------------
372 static int CeedVectorGetArrayCore_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
373   CeedVector_Sycl *impl;
374 
375   CeedCallBackend(CeedVectorGetData(vec, &impl));
376 
377   // Sync array to requested mem_type
378   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
379 
380   // Update pointer
381   switch (mem_type) {
382     case CEED_MEM_HOST:
383       *array = impl->h_array;
384       break;
385     case CEED_MEM_DEVICE:
386       *array = impl->d_array;
387       break;
388   }
389   return CEED_ERROR_SUCCESS;
390 }
391 
392 //------------------------------------------------------------------------------
393 // Get read-only access to a vector via the specified mem_type
394 //------------------------------------------------------------------------------
395 static int CeedVectorGetArrayRead_Sycl(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
396   return CeedVectorGetArrayCore_Sycl(vec, mem_type, (CeedScalar **)array);
397 }
398 
399 //------------------------------------------------------------------------------
400 // Get read/write access to a vector via the specified mem_type
401 //------------------------------------------------------------------------------
402 static int CeedVectorGetArray_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
403   CeedVector_Sycl *impl;
404 
405   CeedCallBackend(CeedVectorGetData(vec, &impl));
406   CeedCallBackend(CeedVectorGetArrayCore_Sycl(vec, mem_type, array));
407   CeedCallBackend(CeedVectorSetAllInvalid_Sycl(vec));
408   switch (mem_type) {
409     case CEED_MEM_HOST:
410       impl->h_array = *array;
411       break;
412     case CEED_MEM_DEVICE:
413       impl->d_array = *array;
414       break;
415   }
416   return CEED_ERROR_SUCCESS;
417 }
418 
419 //------------------------------------------------------------------------------
420 // Get write access to a vector via the specified mem_type
421 //------------------------------------------------------------------------------
422 static int CeedVectorGetArrayWrite_Sycl(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
423   bool             has_array_of_type = true;
424   CeedVector_Sycl *impl;
425 
426   CeedCallBackend(CeedVectorGetData(vec, &impl));
427   CeedCallBackend(CeedVectorHasArrayOfType_Sycl(vec, mem_type, &has_array_of_type));
428   if (!has_array_of_type) {
429     // Allocate if array is not yet allocated
430     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
431   } else {
432     // Select dirty array
433     switch (mem_type) {
434       case CEED_MEM_HOST:
435         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
436         else impl->h_array = impl->h_array_owned;
437         break;
438       case CEED_MEM_DEVICE:
439         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
440         else impl->d_array = impl->d_array_owned;
441     }
442   }
443   return CeedVectorGetArray_Sycl(vec, mem_type, array);
444 }
445 
446 //------------------------------------------------------------------------------
447 // Get the norm of a CeedVector
448 //------------------------------------------------------------------------------
449 static int CeedVectorNorm_Sycl(CeedVector vec, CeedNormType type, CeedScalar *norm) {
450   Ceed              ceed;
451   Ceed_Sycl        *data;
452   CeedSize          length;
453   const CeedScalar *d_array;
454   CeedVector_Sycl  *impl;
455 
456   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
457   CeedCallBackend(CeedGetData(ceed, &data));
458   CeedCallBackend(CeedDestroy(&ceed));
459   CeedCallBackend(CeedVectorGetData(vec, &impl));
460   CeedCallBackend(CeedVectorGetLength(vec, &length));
461 
462   // Compute norm
463   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
464 
465   std::vector<sycl::event> e;
466 
467   if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
468 
469   switch (type) {
470     case CEED_NORM_1: {
471       // Order queue
472       auto sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
473       data->sycl_queue.parallel_for(length, e, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += abs(d_array[i]); }).wait_and_throw();
474     } break;
475     case CEED_NORM_2: {
476       // Order queue
477       auto sumReduction = sycl::reduction(impl->reduction_norm, sycl::plus<>(), {sycl::property::reduction::initialize_to_identity{}});
478       data->sycl_queue.parallel_for(length, e, sumReduction, [=](sycl::id<1> i, auto &sum) { sum += (d_array[i] * d_array[i]); }).wait_and_throw();
479     } break;
480     case CEED_NORM_MAX: {
481       // Order queue
482       auto maxReduction = sycl::reduction(impl->reduction_norm, sycl::maximum<>(), {sycl::property::reduction::initialize_to_identity{}});
483       data->sycl_queue.parallel_for(length, e, maxReduction, [=](sycl::id<1> i, auto &max) { max.combine(abs(d_array[i])); }).wait_and_throw();
484     } break;
485   }
486   // L2 norm - square root over reduced value
487   if (type == CEED_NORM_2) *norm = sqrt(*impl->reduction_norm);
488   else *norm = *impl->reduction_norm;
489   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
490   return CEED_ERROR_SUCCESS;
491 }
492 
493 //------------------------------------------------------------------------------
494 // Take reciprocal of a vector on host
495 //------------------------------------------------------------------------------
496 static int CeedHostReciprocal_Sycl(CeedScalar *h_array, CeedSize length) {
497   for (CeedSize i = 0; i < length; i++) {
498     if (std::fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
499   }
500   return CEED_ERROR_SUCCESS;
501 }
502 
503 //------------------------------------------------------------------------------
504 // Take reciprocal of a vector on device
505 //------------------------------------------------------------------------------
506 static int CeedDeviceReciprocal_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length) {
507   std::vector<sycl::event> e;
508 
509   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
510   sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) {
511     if (std::fabs(d_array[i]) > CEED_EPSILON) d_array[i] = 1. / d_array[i];
512   });
513   return CEED_ERROR_SUCCESS;
514 }
515 
516 //------------------------------------------------------------------------------
517 // Take reciprocal of a vector
518 //------------------------------------------------------------------------------
519 static int CeedVectorReciprocal_Sycl(CeedVector vec) {
520   Ceed             ceed;
521   Ceed_Sycl       *data;
522   CeedSize         length;
523   CeedVector_Sycl *impl;
524 
525   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
526   CeedCallBackend(CeedGetData(ceed, &data));
527   CeedCallBackend(CeedDestroy(&ceed));
528   CeedCallBackend(CeedVectorGetData(vec, &impl));
529   CeedCallBackend(CeedVectorGetLength(vec, &length));
530 
531   // Set value for synced device/host array
532   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Sycl(data->sycl_queue, impl->d_array, length));
533   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Sycl(impl->h_array, length));
534   return CEED_ERROR_SUCCESS;
535 }
536 
537 //------------------------------------------------------------------------------
538 // Compute x = alpha x on the host
539 //------------------------------------------------------------------------------
540 static int CeedHostScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
541   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
542   return CEED_ERROR_SUCCESS;
543 }
544 
545 //------------------------------------------------------------------------------
546 // Compute x = alpha x on device
547 //------------------------------------------------------------------------------
548 static int CeedDeviceScale_Sycl(sycl::queue &sycl_queue, CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
549   std::vector<sycl::event> e;
550 
551   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
552   sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { x_array[i] *= alpha; });
553   return CEED_ERROR_SUCCESS;
554 }
555 
556 //------------------------------------------------------------------------------
557 // Compute x = alpha x
558 //------------------------------------------------------------------------------
559 static int CeedVectorScale_Sycl(CeedVector x, CeedScalar alpha) {
560   Ceed             ceed;
561   Ceed_Sycl       *data;
562   CeedSize         length;
563   CeedVector_Sycl *x_impl;
564 
565   CeedCallBackend(CeedVectorGetCeed(x, &ceed));
566   CeedCallBackend(CeedGetData(ceed, &data));
567   CeedCallBackend(CeedDestroy(&ceed));
568   CeedCallBackend(CeedVectorGetData(x, &x_impl));
569   CeedCallBackend(CeedVectorGetLength(x, &length));
570 
571   // Set value for synced device/host array
572   if (x_impl->d_array) CeedCallBackend(CeedDeviceScale_Sycl(data->sycl_queue, x_impl->d_array, alpha, length));
573   if (x_impl->h_array) CeedCallBackend(CeedHostScale_Sycl(x_impl->h_array, alpha, length));
574   return CEED_ERROR_SUCCESS;
575 }
576 
577 //------------------------------------------------------------------------------
578 // Compute y = alpha x + y on the host
579 //------------------------------------------------------------------------------
580 static int CeedHostAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
581   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
582   return CEED_ERROR_SUCCESS;
583 }
584 
585 //------------------------------------------------------------------------------
586 // Compute y = alpha x + y on device
587 //------------------------------------------------------------------------------
588 static int CeedDeviceAXPY_Sycl(sycl::queue &sycl_queue, CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
589   std::vector<sycl::event> e;
590 
591   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
592   sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { y_array[i] += alpha * x_array[i]; });
593   return CEED_ERROR_SUCCESS;
594 }
595 
596 //------------------------------------------------------------------------------
597 // Compute y = alpha x + y
598 //------------------------------------------------------------------------------
599 static int CeedVectorAXPY_Sycl(CeedVector y, CeedScalar alpha, CeedVector x) {
600   Ceed             ceed;
601   Ceed_Sycl       *data;
602   CeedSize         length;
603   CeedVector_Sycl *y_impl, *x_impl;
604 
605   CeedCallBackend(CeedVectorGetCeed(y, &ceed));
606   CeedCallBackend(CeedGetData(ceed, &data));
607   CeedCallBackend(CeedDestroy(&ceed));
608   CeedCallBackend(CeedVectorGetData(y, &y_impl));
609   CeedCallBackend(CeedVectorGetData(x, &x_impl));
610   CeedCallBackend(CeedVectorGetLength(y, &length));
611 
612   // Set value for synced device/host array
613   if (y_impl->d_array) {
614     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
615     CeedCallBackend(CeedDeviceAXPY_Sycl(data->sycl_queue, y_impl->d_array, alpha, x_impl->d_array, length));
616   }
617   if (y_impl->h_array) {
618     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
619     CeedCallBackend(CeedHostAXPY_Sycl(y_impl->h_array, alpha, x_impl->h_array, length));
620   }
621   return CEED_ERROR_SUCCESS;
622 }
623 
624 //------------------------------------------------------------------------------
625 // Compute the pointwise multiplication w = x .* y on the host
626 //------------------------------------------------------------------------------
627 static int CeedHostPointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
628   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
629   return CEED_ERROR_SUCCESS;
630 }
631 
632 //------------------------------------------------------------------------------
633 // Compute the pointwise multiplication w = x .* y on device (impl in .cu file)
634 //------------------------------------------------------------------------------
635 static int CeedDevicePointwiseMult_Sycl(sycl::queue &sycl_queue, CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
636   std::vector<sycl::event> e;
637 
638   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
639   sycl_queue.parallel_for(length, e, [=](sycl::id<1> i) { w_array[i] = x_array[i] * y_array[i]; });
640   return CEED_ERROR_SUCCESS;
641 }
642 
643 //------------------------------------------------------------------------------
644 // Compute the pointwise multiplication w = x .* y
645 //------------------------------------------------------------------------------
646 static int CeedVectorPointwiseMult_Sycl(CeedVector w, CeedVector x, CeedVector y) {
647   Ceed             ceed;
648   Ceed_Sycl       *data;
649   CeedSize         length;
650   CeedVector_Sycl *w_impl, *x_impl, *y_impl;
651 
652   CeedCallBackend(CeedVectorGetCeed(w, &ceed));
653   CeedCallBackend(CeedGetData(ceed, &data));
654   CeedCallBackend(CeedDestroy(&ceed));
655   CeedCallBackend(CeedVectorGetData(w, &w_impl));
656   CeedCallBackend(CeedVectorGetData(x, &x_impl));
657   CeedCallBackend(CeedVectorGetData(y, &y_impl));
658   CeedCallBackend(CeedVectorGetLength(w, &length));
659 
660   // Set value for synced device/host array
661   if (!w_impl->d_array && !w_impl->h_array) {
662     CeedCallBackend(CeedVectorSetValue(w, 0.0));
663   }
664   if (w_impl->d_array) {
665     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
666     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
667     CeedCallBackend(CeedDevicePointwiseMult_Sycl(data->sycl_queue, w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
668   }
669   if (w_impl->h_array) {
670     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
671     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
672     CeedCallBackend(CeedHostPointwiseMult_Sycl(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
673   }
674   return CEED_ERROR_SUCCESS;
675 }
676 
677 //------------------------------------------------------------------------------
678 // Destroy the vector
679 //------------------------------------------------------------------------------
680 static int CeedVectorDestroy_Sycl(const CeedVector vec) {
681   Ceed             ceed;
682   Ceed_Sycl       *data;
683   CeedVector_Sycl *impl;
684 
685   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
686   CeedCallBackend(CeedVectorGetData(vec, &impl));
687   CeedCallBackend(CeedGetData(ceed, &data));
688 
689   // Wait for all work to finish before freeing memory
690   CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
691   CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context));
692   CeedCallSycl(ceed, sycl::free(impl->reduction_norm, data->sycl_context));
693 
694   CeedCallBackend(CeedFree(&impl->h_array_owned));
695   CeedCallBackend(CeedFree(&impl));
696   CeedCallBackend(CeedDestroy(&ceed));
697   return CEED_ERROR_SUCCESS;
698 }
699 
700 //------------------------------------------------------------------------------
701 // Create a vector of the specified length (does not allocate memory)
702 //------------------------------------------------------------------------------
703 int CeedVectorCreate_Sycl(CeedSize n, CeedVector vec) {
704   Ceed             ceed;
705   Ceed_Sycl       *data;
706   CeedVector_Sycl *impl;
707 
708   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
709   CeedCallBackend(CeedGetData(ceed, &data));
710   CeedCallBackend(CeedCalloc(1, &impl));
711   CeedCallSycl(ceed, impl->reduction_norm = sycl::malloc_host<CeedScalar>(1, data->sycl_context));
712   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Sycl));
713   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Sycl));
714   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Sycl));
715   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Sycl));
716   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Sycl));
717   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Sycl));
718   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Sycl));
719   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Sycl));
720   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Sycl));
721   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Norm", CeedVectorNorm_Sycl));
722   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Sycl));
723   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Sycl));
724   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Scale", CeedVectorScale_Sycl));
725   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Sycl));
726   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Sycl));
727   CeedCallBackend(CeedDestroy(&ceed));
728   CeedCallBackend(CeedVectorSetData(vec, impl));
729   return CEED_ERROR_SUCCESS;
730 }
731