xref: /libCEED/rust/libceed-sys/c-src/backends/sycl-ref/ceed-sycl-restriction.sycl.cpp (revision 20a16a5fde8c37c2820b3798c7dd8f97d46128bb)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other
2 // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
3 // files for details.
4 //
5 // SPDX-License-Identifier: BSD-2-Clause
6 //
7 // This file is part of CEED:  http://github.com/ceed
8 
9 #include <ceed/backend.h>
10 #include <ceed/ceed.h>
11 #include <ceed/jit-tools.h>
12 
13 #include <string>
14 #include <sycl/sycl.hpp>
15 
16 #include "../sycl/ceed-sycl-compile.hpp"
17 #include "ceed-sycl-ref.hpp"
18 
19 class CeedElemRestrSyclStridedNT;
20 class CeedElemRestrSyclOffsetNT;
21 class CeedElemRestrSyclStridedT;
22 class CeedElemRestrSyclOffsetT;
23 
24 //------------------------------------------------------------------------------
25 // Restriction Kernel : L-vector -> E-vector, strided
26 //------------------------------------------------------------------------------
27 static int CeedElemRestrictionStridedNoTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestriction_Sycl *impl, const CeedScalar *u,
28                                                       CeedScalar *v) {
29   const CeedInt  elem_size    = impl->elem_size;
30   const CeedInt  num_elem     = impl->num_elem;
31   const CeedInt  num_comp     = impl->num_comp;
32   const CeedInt  stride_nodes = impl->strides[0];
33   const CeedInt  stride_comp  = impl->strides[1];
34   const CeedInt  stride_elem  = impl->strides[2];
35   sycl::range<1> kernel_range(num_elem * elem_size);
36 
37   std::vector<sycl::event> e;
38 
39   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
40   sycl_queue.parallel_for<CeedElemRestrSyclStridedNT>(kernel_range, e, [=](sycl::id<1> node) {
41     const CeedInt loc_node = node % elem_size;
42     const CeedInt elem     = node / elem_size;
43 
44     for (CeedInt comp = 0; comp < num_comp; comp++) {
45       v[loc_node + comp * elem_size * num_elem + elem * elem_size] = u[loc_node * stride_nodes + comp * stride_comp + elem * stride_elem];
46     }
47   });
48   return CEED_ERROR_SUCCESS;
49 }
50 
51 //------------------------------------------------------------------------------
52 // Restriction Kernel : L-vector -> E-vector, offsets provided
53 //------------------------------------------------------------------------------
54 static int CeedElemRestrictionOffsetNoTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestriction_Sycl *impl, const CeedScalar *u,
55                                                      CeedScalar *v) {
56   const CeedInt  elem_size   = impl->elem_size;
57   const CeedInt  num_elem    = impl->num_elem;
58   const CeedInt  num_comp    = impl->num_comp;
59   const CeedInt  comp_stride = impl->comp_stride;
60   const CeedInt *indices     = impl->d_offsets;
61 
62   sycl::range<1> kernel_range(num_elem * elem_size);
63 
64   std::vector<sycl::event> e;
65 
66   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
67   sycl_queue.parallel_for<CeedElemRestrSyclOffsetNT>(kernel_range, e, [=](sycl::id<1> node) {
68     const CeedInt ind      = indices[node];
69     const CeedInt loc_node = node % elem_size;
70     const CeedInt elem     = node / elem_size;
71 
72     for (CeedInt comp = 0; comp < num_comp; comp++) {
73       v[loc_node + comp * elem_size * num_elem + elem * elem_size] = u[ind + comp * comp_stride];
74     }
75   });
76   return CEED_ERROR_SUCCESS;
77 }
78 
79 //------------------------------------------------------------------------------
80 // Kernel: E-vector -> L-vector, strided
81 //------------------------------------------------------------------------------
82 static int CeedElemRestrictionStridedTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestriction_Sycl *impl, const CeedScalar *u,
83                                                     CeedScalar *v) {
84   const CeedInt elem_size    = impl->elem_size;
85   const CeedInt num_elem     = impl->num_elem;
86   const CeedInt num_comp     = impl->num_comp;
87   const CeedInt stride_nodes = impl->strides[0];
88   const CeedInt stride_comp  = impl->strides[1];
89   const CeedInt stride_elem  = impl->strides[2];
90 
91   sycl::range<1> kernel_range(num_elem * elem_size);
92 
93   std::vector<sycl::event> e;
94 
95   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
96   sycl_queue.parallel_for<CeedElemRestrSyclStridedT>(kernel_range, e, [=](sycl::id<1> node) {
97     const CeedInt loc_node = node % elem_size;
98     const CeedInt elem     = node / elem_size;
99 
100     for (CeedInt comp = 0; comp < num_comp; comp++) {
101       v[loc_node * stride_nodes + comp * stride_comp + elem * stride_elem] += u[loc_node + comp * elem_size * num_elem + elem * elem_size];
102     }
103   });
104   return CEED_ERROR_SUCCESS;
105 }
106 
107 //------------------------------------------------------------------------------
108 // Kernel: E-vector -> L-vector, offsets provided
109 //------------------------------------------------------------------------------
110 static int CeedElemRestrictionOffsetTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestriction_Sycl *impl, const CeedScalar *u,
111                                                    CeedScalar *v) {
112   const CeedInt  num_nodes     = impl->num_nodes;
113   const CeedInt  elem_size     = impl->elem_size;
114   const CeedInt  num_elem      = impl->num_elem;
115   const CeedInt  num_comp      = impl->num_comp;
116   const CeedInt  comp_stride   = impl->comp_stride;
117   const CeedInt *l_vec_indices = impl->d_l_vec_indices;
118   const CeedInt *t_offsets     = impl->d_t_offsets;
119   const CeedInt *t_indices     = impl->d_t_indices;
120 
121   sycl::range<1> kernel_range(num_nodes * num_comp);
122 
123   std::vector<sycl::event> e;
124 
125   if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()};
126   sycl_queue.parallel_for<CeedElemRestrSyclOffsetT>(kernel_range, e, [=](sycl::id<1> id) {
127     const CeedInt node    = id % num_nodes;
128     const CeedInt comp    = id / num_nodes;
129     const CeedInt ind     = l_vec_indices[node];
130     const CeedInt range_1 = t_offsets[node];
131     const CeedInt range_N = t_offsets[node + 1];
132     CeedScalar    value   = 0.0;
133 
134     for (CeedInt j = range_1; j < range_N; j++) {
135       const CeedInt t_ind    = t_indices[j];
136       CeedInt       loc_node = t_ind % elem_size;
137       CeedInt       elem     = t_ind / elem_size;
138 
139       value += u[loc_node + comp * elem_size * num_elem + elem * elem_size];
140     }
141     v[ind + comp * comp_stride] += value;
142   });
143   return CEED_ERROR_SUCCESS;
144 }
145 
146 //------------------------------------------------------------------------------
147 // Apply restriction
148 //------------------------------------------------------------------------------
149 static int CeedElemRestrictionApply_Sycl(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
150   Ceed                      ceed;
151   Ceed_Sycl                *data;
152   const CeedScalar         *d_u;
153   CeedScalar               *d_v;
154   CeedElemRestriction_Sycl *impl;
155 
156   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
157   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
158   CeedCallBackend(CeedGetData(ceed, &data));
159 
160   // Get vectors
161   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
162   if (t_mode == CEED_TRANSPOSE) {
163     // Sum into for transpose mode, e-vec to l-vec
164     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
165   } else {
166     // Overwrite for notranspose mode, l-vec to e-vec
167     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
168   }
169 
170   // Restrict
171   if (t_mode == CEED_NOTRANSPOSE) {
172     // L-vector -> E-vector
173     if (impl->d_offsets) {
174       // -- Offsets provided
175       CeedCallBackend(CeedElemRestrictionOffsetNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v));
176     } else {
177       // -- Strided restriction
178       CeedCallBackend(CeedElemRestrictionStridedNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v));
179     }
180   } else {
181     // E-vector -> L-vector
182     if (impl->d_offsets) {
183       // -- Offsets provided
184       CeedCallBackend(CeedElemRestrictionOffsetTranspose_Sycl(data->sycl_queue, impl, d_u, d_v));
185     } else {
186       // -- Strided restriction
187       CeedCallBackend(CeedElemRestrictionStridedTranspose_Sycl(data->sycl_queue, impl, d_u, d_v));
188     }
189   }
190   // Wait for queues to be completed. NOTE: This may not be necessary
191   CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
192 
193   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
194 
195   // Restore arrays
196   CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
197   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
198   CeedCallBackend(CeedDestroy(&ceed));
199   return CEED_ERROR_SUCCESS;
200 }
201 
202 //------------------------------------------------------------------------------
203 // Get offsets
204 //------------------------------------------------------------------------------
205 static int CeedElemRestrictionGetOffsets_Sycl(CeedElemRestriction rstr, CeedMemType m_type, const CeedInt **offsets) {
206   CeedElemRestriction_Sycl *impl;
207 
208   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
209 
210   switch (m_type) {
211     case CEED_MEM_HOST:
212       *offsets = impl->h_offsets;
213       break;
214     case CEED_MEM_DEVICE:
215       *offsets = impl->d_offsets;
216       break;
217   }
218   return CEED_ERROR_SUCCESS;
219 }
220 
221 //------------------------------------------------------------------------------
222 // Destroy restriction
223 //------------------------------------------------------------------------------
224 static int CeedElemRestrictionDestroy_Sycl(CeedElemRestriction rstr) {
225   Ceed                      ceed;
226   Ceed_Sycl                *data;
227   CeedElemRestriction_Sycl *impl;
228 
229   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
230   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
231   CeedCallBackend(CeedGetData(ceed, &data));
232 
233   // Wait for all work to finish before freeing memory
234   CeedCallSycl(ceed, data->sycl_queue.wait_and_throw());
235 
236   CeedCallBackend(CeedFree(&impl->h_offsets_owned));
237   CeedCallSycl(ceed, sycl::free(impl->d_offsets_owned, data->sycl_context));
238   CeedCallSycl(ceed, sycl::free(impl->d_t_offsets, data->sycl_context));
239   CeedCallSycl(ceed, sycl::free(impl->d_t_indices, data->sycl_context));
240   CeedCallSycl(ceed, sycl::free(impl->d_l_vec_indices, data->sycl_context));
241   CeedCallBackend(CeedFree(&impl));
242   CeedCallBackend(CeedDestroy(&ceed));
243   return CEED_ERROR_SUCCESS;
244 }
245 
246 //------------------------------------------------------------------------------
247 // Create transpose offsets and indices
248 //------------------------------------------------------------------------------
249 static int CeedElemRestrictionOffset_Sycl(const CeedElemRestriction rstr, const CeedInt *indices) {
250   Ceed                      ceed;
251   Ceed_Sycl                *data;
252   bool                     *is_node;
253   CeedSize                  l_size;
254   CeedInt                   num_elem, elem_size, num_comp, num_nodes = 0, *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices;
255   CeedElemRestriction_Sycl *impl;
256 
257   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
258   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
259   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
260   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
261   CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
262   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
263 
264   // Count num_nodes
265   CeedCallBackend(CeedCalloc(l_size, &is_node));
266   const CeedInt size_indices = num_elem * elem_size;
267 
268   for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1;
269   for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i];
270   impl->num_nodes = num_nodes;
271 
272   // L-vector offsets array
273   CeedCallBackend(CeedCalloc(l_size, &ind_to_offset));
274   CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices));
275   for (CeedInt i = 0, j = 0; i < l_size; i++) {
276     if (is_node[i]) {
277       l_vec_indices[j] = i;
278       ind_to_offset[i] = j++;
279     }
280   }
281   CeedCallBackend(CeedFree(&is_node));
282 
283   // Compute transpose offsets and indices
284   const CeedInt size_offsets = num_nodes + 1;
285 
286   CeedCallBackend(CeedCalloc(size_offsets, &t_offsets));
287   CeedCallBackend(CeedMalloc(size_indices, &t_indices));
288   // Count node multiplicity
289   for (CeedInt e = 0; e < num_elem; ++e) {
290     for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1];
291   }
292   // Convert to running sum
293   for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1];
294   // List all E-vec indices associated with L-vec node
295   for (CeedInt e = 0; e < num_elem; ++e) {
296     for (CeedInt i = 0; i < elem_size; ++i) {
297       const CeedInt lid                          = elem_size * e + i;
298       const CeedInt gid                          = indices[lid];
299       t_indices[t_offsets[ind_to_offset[gid]]++] = lid;
300     }
301   }
302   // Reset running sum
303   for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1];
304   t_offsets[0] = 0;
305 
306   // Copy data to device
307   CeedCallBackend(CeedGetData(ceed, &data));
308 
309   std::vector<sycl::event> e;
310 
311   if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()};
312 
313   // -- L-vector indices
314   CeedCallSycl(ceed, impl->d_l_vec_indices = sycl::malloc_device<CeedInt>(num_nodes, data->sycl_device, data->sycl_context));
315   sycl::event copy_lvec = data->sycl_queue.copy<CeedInt>(l_vec_indices, impl->d_l_vec_indices, num_nodes, e);
316   // -- Transpose offsets
317   CeedCallSycl(ceed, impl->d_t_offsets = sycl::malloc_device<CeedInt>(size_offsets, data->sycl_device, data->sycl_context));
318   sycl::event copy_offsets = data->sycl_queue.copy<CeedInt>(t_offsets, impl->d_t_offsets, size_offsets, e);
319   // -- Transpose indices
320   CeedCallSycl(ceed, impl->d_t_indices = sycl::malloc_device<CeedInt>(size_indices, data->sycl_device, data->sycl_context));
321   sycl::event copy_indices = data->sycl_queue.copy<CeedInt>(t_indices, impl->d_t_indices, size_indices, e);
322 
323   // Wait for all copies to complete and handle exceptions
324   CeedCallSycl(ceed, sycl::event::wait_and_throw({copy_lvec, copy_offsets, copy_indices}));
325 
326   // Cleanup
327   CeedCallBackend(CeedFree(&ind_to_offset));
328   CeedCallBackend(CeedFree(&l_vec_indices));
329   CeedCallBackend(CeedFree(&t_offsets));
330   CeedCallBackend(CeedFree(&t_indices));
331   CeedCallBackend(CeedDestroy(&ceed));
332   return CEED_ERROR_SUCCESS;
333 }
334 
335 //------------------------------------------------------------------------------
336 // Create restriction
337 //------------------------------------------------------------------------------
338 int CeedElemRestrictionCreate_Sycl(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
339                                    const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
340   Ceed                      ceed;
341   Ceed_Sycl                *data;
342   bool                      is_strided;
343   CeedInt                   num_elem, num_comp, elem_size, comp_stride = 1;
344   CeedRestrictionType       rstr_type;
345   CeedElemRestriction_Sycl *impl;
346 
347   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
348   CeedCallBackend(CeedGetData(ceed, &data));
349   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
350   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
351   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
352   const CeedInt size       = num_elem * elem_size;
353   CeedInt       strides[3] = {1, size, elem_size};
354 
355   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
356   CeedCheck(rstr_type != CEED_RESTRICTION_ORIENTED && rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_BACKEND,
357             "Backend does not implement CeedElemRestrictionCreateOriented or CeedElemRestrictionCreateCurlOriented");
358 
359   // Stride data
360   CeedCallBackend(CeedElemRestrictionIsStrided(rstr, &is_strided));
361   if (is_strided) {
362     bool has_backend_strides;
363 
364     CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
365     if (!has_backend_strides) {
366       CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
367     }
368   } else {
369     CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
370   }
371 
372   CeedCallBackend(CeedCalloc(1, &impl));
373   impl->num_nodes   = size;
374   impl->num_elem    = num_elem;
375   impl->num_comp    = num_comp;
376   impl->elem_size   = elem_size;
377   impl->comp_stride = comp_stride;
378   impl->strides[0]  = strides[0];
379   impl->strides[1]  = strides[1];
380   impl->strides[2]  = strides[2];
381   CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
382 
383   // Set layouts
384   {
385     bool    has_backend_strides;
386     CeedInt layout[3] = {1, size, elem_size};
387 
388     CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout));
389     if (rstr_type == CEED_RESTRICTION_STRIDED) {
390       CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
391       if (has_backend_strides) {
392         CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout));
393       }
394     }
395   }
396 
397   // Set up device indices/offset arrays
398   switch (mem_type) {
399     case CEED_MEM_HOST: {
400       switch (copy_mode) {
401         case CEED_COPY_VALUES:
402           if (offsets != NULL) {
403             CeedCallBackend(CeedMalloc(elem_size * num_elem, &impl->h_offsets_owned));
404             memcpy(impl->h_offsets_owned, offsets, elem_size * num_elem * sizeof(CeedInt));
405             impl->h_offsets_borrowed = NULL;
406             impl->h_offsets          = impl->h_offsets_owned;
407           }
408           break;
409         case CEED_OWN_POINTER:
410           impl->h_offsets_owned    = (CeedInt *)offsets;
411           impl->h_offsets_borrowed = NULL;
412           impl->h_offsets          = impl->h_offsets_owned;
413           break;
414         case CEED_USE_POINTER:
415           impl->h_offsets_owned    = NULL;
416           impl->h_offsets_borrowed = (CeedInt *)offsets;
417           impl->h_offsets          = impl->h_offsets_borrowed;
418           break;
419       }
420       if (offsets != NULL) {
421         CeedCallSycl(ceed, impl->d_offsets_owned = sycl::malloc_device<CeedInt>(size, data->sycl_device, data->sycl_context));
422         // Copy from host to device
423         // -- Order queue
424         sycl::event e          = data->sycl_queue.ext_oneapi_submit_barrier();
425         sycl::event copy_event = data->sycl_queue.copy<CeedInt>(impl->h_offsets, impl->d_offsets_owned, size, {e});
426         // -- Wait for copy to finish and handle exceptions
427         CeedCallSycl(ceed, copy_event.wait_and_throw());
428         impl->d_offsets = impl->d_offsets_owned;
429         CeedCallBackend(CeedElemRestrictionOffset_Sycl(rstr, offsets));
430       }
431     } break;
432     case CEED_MEM_DEVICE: {
433       switch (copy_mode) {
434         case CEED_COPY_VALUES:
435           if (offsets != NULL) {
436             CeedCallSycl(ceed, impl->d_offsets_owned = sycl::malloc_device<CeedInt>(size, data->sycl_device, data->sycl_context));
437             // Copy from device to device
438             // -- Order queue
439             sycl::event e          = data->sycl_queue.ext_oneapi_submit_barrier();
440             sycl::event copy_event = data->sycl_queue.copy<CeedInt>(offsets, impl->d_offsets_owned, size, {e});
441             // -- Wait for copy to finish and handle exceptions
442             CeedCallSycl(ceed, copy_event.wait_and_throw());
443             impl->d_offsets = impl->d_offsets_owned;
444           }
445           break;
446         case CEED_OWN_POINTER:
447           impl->d_offsets_owned    = (CeedInt *)offsets;
448           impl->d_offsets_borrowed = NULL;
449           impl->d_offsets          = impl->d_offsets_owned;
450           break;
451         case CEED_USE_POINTER:
452           impl->d_offsets_owned    = NULL;
453           impl->d_offsets_borrowed = (CeedInt *)offsets;
454           impl->d_offsets          = impl->d_offsets_borrowed;
455       }
456       if (offsets != NULL) {
457         CeedCallBackend(CeedMalloc(elem_size * num_elem, &impl->h_offsets_owned));
458         // Copy from device to host
459         // -- Order queue
460         sycl::event e          = data->sycl_queue.ext_oneapi_submit_barrier();
461         sycl::event copy_event = data->sycl_queue.copy<CeedInt>(impl->d_offsets, impl->h_offsets_owned, elem_size * num_elem, {e});
462         // -- Wait for copy to finish and handle exceptions
463         CeedCallSycl(ceed, copy_event.wait_and_throw());
464         impl->h_offsets = impl->h_offsets_owned;
465         CeedCallBackend(CeedElemRestrictionOffset_Sycl(rstr, offsets));
466       }
467     }
468   }
469 
470   // Register backend functions
471   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Sycl));
472   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApply_Sycl));
473   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApply_Sycl));
474   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Sycl));
475   CeedCallBackend(CeedSetBackendFunctionCpp(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Sycl));
476   CeedCallBackend(CeedDestroy(&ceed));
477   return CEED_ERROR_SUCCESS;
478 }
479