Lines Matching +full:- +full:e

1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other
2 // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
5 // SPDX-License-Identifier: BSD-2-Clause
11 #include <ceed/jit-tools.h>
16 #include "../sycl/ceed-sycl-compile.hpp"
17 #include "ceed-sycl-ref.hpp"
24 //------------------------------------------------------------------------------
25 // Restriction Kernel : L-vector -> E-vector, strided
26 //------------------------------------------------------------------------------
29 const CeedInt elem_size = impl->elem_size; in CeedElemRestrictionStridedNoTranspose_Sycl()
30 const CeedInt num_elem = impl->num_elem; in CeedElemRestrictionStridedNoTranspose_Sycl()
31 const CeedInt num_comp = impl->num_comp; in CeedElemRestrictionStridedNoTranspose_Sycl()
32 const CeedInt stride_nodes = impl->strides[0]; in CeedElemRestrictionStridedNoTranspose_Sycl()
33 const CeedInt stride_comp = impl->strides[1]; in CeedElemRestrictionStridedNoTranspose_Sycl()
34 const CeedInt stride_elem = impl->strides[2]; in CeedElemRestrictionStridedNoTranspose_Sycl()
37 std::vector<sycl::event> e; in CeedElemRestrictionStridedNoTranspose_Sycl() local
39 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; in CeedElemRestrictionStridedNoTranspose_Sycl()
40 sycl_queue.parallel_for<CeedElemRestrSyclStridedNT>(kernel_range, e, [=](sycl::id<1> node) { in CeedElemRestrictionStridedNoTranspose_Sycl()
51 //------------------------------------------------------------------------------
52 // Restriction Kernel : L-vector -> E-vector, offsets provided
53 //------------------------------------------------------------------------------
56 const CeedInt elem_size = impl->elem_size; in CeedElemRestrictionOffsetNoTranspose_Sycl()
57 const CeedInt num_elem = impl->num_elem; in CeedElemRestrictionOffsetNoTranspose_Sycl()
58 const CeedInt num_comp = impl->num_comp; in CeedElemRestrictionOffsetNoTranspose_Sycl()
59 const CeedInt comp_stride = impl->comp_stride; in CeedElemRestrictionOffsetNoTranspose_Sycl()
60 const CeedInt *indices = impl->d_offsets; in CeedElemRestrictionOffsetNoTranspose_Sycl()
64 std::vector<sycl::event> e; in CeedElemRestrictionOffsetNoTranspose_Sycl() local
66 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; in CeedElemRestrictionOffsetNoTranspose_Sycl()
67 sycl_queue.parallel_for<CeedElemRestrSyclOffsetNT>(kernel_range, e, [=](sycl::id<1> node) { in CeedElemRestrictionOffsetNoTranspose_Sycl()
79 //------------------------------------------------------------------------------
80 // Kernel: E-vector -> L-vector, strided
81 //------------------------------------------------------------------------------
84 const CeedInt elem_size = impl->elem_size; in CeedElemRestrictionStridedTranspose_Sycl()
85 const CeedInt num_elem = impl->num_elem; in CeedElemRestrictionStridedTranspose_Sycl()
86 const CeedInt num_comp = impl->num_comp; in CeedElemRestrictionStridedTranspose_Sycl()
87 const CeedInt stride_nodes = impl->strides[0]; in CeedElemRestrictionStridedTranspose_Sycl()
88 const CeedInt stride_comp = impl->strides[1]; in CeedElemRestrictionStridedTranspose_Sycl()
89 const CeedInt stride_elem = impl->strides[2]; in CeedElemRestrictionStridedTranspose_Sycl()
93 std::vector<sycl::event> e; in CeedElemRestrictionStridedTranspose_Sycl() local
95 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; in CeedElemRestrictionStridedTranspose_Sycl()
96 sycl_queue.parallel_for<CeedElemRestrSyclStridedT>(kernel_range, e, [=](sycl::id<1> node) { in CeedElemRestrictionStridedTranspose_Sycl()
107 //------------------------------------------------------------------------------
108 // Kernel: E-vector -> L-vector, offsets provided
109 //------------------------------------------------------------------------------
112 const CeedInt num_nodes = impl->num_nodes; in CeedElemRestrictionOffsetTranspose_Sycl()
113 const CeedInt elem_size = impl->elem_size; in CeedElemRestrictionOffsetTranspose_Sycl()
114 const CeedInt num_elem = impl->num_elem; in CeedElemRestrictionOffsetTranspose_Sycl()
115 const CeedInt num_comp = impl->num_comp; in CeedElemRestrictionOffsetTranspose_Sycl()
116 const CeedInt comp_stride = impl->comp_stride; in CeedElemRestrictionOffsetTranspose_Sycl()
117 const CeedInt *l_vec_indices = impl->d_l_vec_indices; in CeedElemRestrictionOffsetTranspose_Sycl()
118 const CeedInt *t_offsets = impl->d_t_offsets; in CeedElemRestrictionOffsetTranspose_Sycl()
119 const CeedInt *t_indices = impl->d_t_indices; in CeedElemRestrictionOffsetTranspose_Sycl()
123 std::vector<sycl::event> e; in CeedElemRestrictionOffsetTranspose_Sycl() local
125 if (!sycl_queue.is_in_order()) e = {sycl_queue.ext_oneapi_submit_barrier()}; in CeedElemRestrictionOffsetTranspose_Sycl()
126 sycl_queue.parallel_for<CeedElemRestrSyclOffsetT>(kernel_range, e, [=](sycl::id<1> id) { in CeedElemRestrictionOffsetTranspose_Sycl()
146 //------------------------------------------------------------------------------
148 //------------------------------------------------------------------------------
163 // Sum into for transpose mode, e-vec to l-vec in CeedElemRestrictionApply_Sycl()
166 // Overwrite for notranspose mode, l-vec to e-vec in CeedElemRestrictionApply_Sycl()
172 // L-vector -> E-vector in CeedElemRestrictionApply_Sycl()
173 if (impl->d_offsets) { in CeedElemRestrictionApply_Sycl()
174 // -- Offsets provided in CeedElemRestrictionApply_Sycl()
175 CeedCallBackend(CeedElemRestrictionOffsetNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl()
177 // -- Strided restriction in CeedElemRestrictionApply_Sycl()
178 CeedCallBackend(CeedElemRestrictionStridedNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl()
181 // E-vector -> L-vector in CeedElemRestrictionApply_Sycl()
182 if (impl->d_offsets) { in CeedElemRestrictionApply_Sycl()
183 // -- Offsets provided in CeedElemRestrictionApply_Sycl()
184 CeedCallBackend(CeedElemRestrictionOffsetTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl()
186 // -- Strided restriction in CeedElemRestrictionApply_Sycl()
187 CeedCallBackend(CeedElemRestrictionStridedTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl()
191 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw()); in CeedElemRestrictionApply_Sycl()
202 //------------------------------------------------------------------------------
204 //------------------------------------------------------------------------------
212 *offsets = impl->h_offsets; in CeedElemRestrictionGetOffsets_Sycl()
215 *offsets = impl->d_offsets; in CeedElemRestrictionGetOffsets_Sycl()
221 //------------------------------------------------------------------------------
223 //------------------------------------------------------------------------------
234 CeedCallSycl(ceed, data->sycl_queue.wait_and_throw()); in CeedElemRestrictionDestroy_Sycl()
236 CeedCallBackend(CeedFree(&impl->h_offsets_owned)); in CeedElemRestrictionDestroy_Sycl()
237 CeedCallSycl(ceed, sycl::free(impl->d_offsets_owned, data->sycl_context)); in CeedElemRestrictionDestroy_Sycl()
238 CeedCallSycl(ceed, sycl::free(impl->d_t_offsets, data->sycl_context)); in CeedElemRestrictionDestroy_Sycl()
239 CeedCallSycl(ceed, sycl::free(impl->d_t_indices, data->sycl_context)); in CeedElemRestrictionDestroy_Sycl()
240 CeedCallSycl(ceed, sycl::free(impl->d_l_vec_indices, data->sycl_context)); in CeedElemRestrictionDestroy_Sycl()
246 //------------------------------------------------------------------------------
248 //------------------------------------------------------------------------------
270 impl->num_nodes = num_nodes; in CeedElemRestrictionOffset_Sycl()
272 // L-vector offsets array in CeedElemRestrictionOffset_Sycl()
289 for (CeedInt e = 0; e < num_elem; ++e) { in CeedElemRestrictionOffset_Sycl() local
290 …for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; in CeedElemRestrictionOffset_Sycl()
293 for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; in CeedElemRestrictionOffset_Sycl()
294 // List all E-vec indices associated with L-vec node in CeedElemRestrictionOffset_Sycl()
295 for (CeedInt e = 0; e < num_elem; ++e) { in CeedElemRestrictionOffset_Sycl() local
297 const CeedInt lid = elem_size * e + i; in CeedElemRestrictionOffset_Sycl()
303 for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; in CeedElemRestrictionOffset_Sycl()
309 std::vector<sycl::event> e; in CeedElemRestrictionOffset_Sycl() local
311 if (!data->sycl_queue.is_in_order()) e = {data->sycl_queue.ext_oneapi_submit_barrier()}; in CeedElemRestrictionOffset_Sycl()
313 // -- L-vector indices in CeedElemRestrictionOffset_Sycl()
314 …CeedCallSycl(ceed, impl->d_l_vec_indices = sycl::malloc_device<CeedInt>(num_nodes, data->sycl_devi… in CeedElemRestrictionOffset_Sycl()
315 …sycl::event copy_lvec = data->sycl_queue.copy<CeedInt>(l_vec_indices, impl->d_l_vec_indices, num_n… in CeedElemRestrictionOffset_Sycl()
316 // -- Transpose offsets in CeedElemRestrictionOffset_Sycl()
317 …CeedCallSycl(ceed, impl->d_t_offsets = sycl::malloc_device<CeedInt>(size_offsets, data->sycl_devic… in CeedElemRestrictionOffset_Sycl()
318 …sycl::event copy_offsets = data->sycl_queue.copy<CeedInt>(t_offsets, impl->d_t_offsets, size_offse… in CeedElemRestrictionOffset_Sycl()
319 // -- Transpose indices in CeedElemRestrictionOffset_Sycl()
320 …CeedCallSycl(ceed, impl->d_t_indices = sycl::malloc_device<CeedInt>(size_indices, data->sycl_devic… in CeedElemRestrictionOffset_Sycl()
321 …sycl::event copy_indices = data->sycl_queue.copy<CeedInt>(t_indices, impl->d_t_indices, size_indic… in CeedElemRestrictionOffset_Sycl()
335 //------------------------------------------------------------------------------
337 //------------------------------------------------------------------------------
373 impl->num_nodes = size; in CeedElemRestrictionCreate_Sycl()
374 impl->num_elem = num_elem; in CeedElemRestrictionCreate_Sycl()
375 impl->num_comp = num_comp; in CeedElemRestrictionCreate_Sycl()
376 impl->elem_size = elem_size; in CeedElemRestrictionCreate_Sycl()
377 impl->comp_stride = comp_stride; in CeedElemRestrictionCreate_Sycl()
378 impl->strides[0] = strides[0]; in CeedElemRestrictionCreate_Sycl()
379 impl->strides[1] = strides[1]; in CeedElemRestrictionCreate_Sycl()
380 impl->strides[2] = strides[2]; in CeedElemRestrictionCreate_Sycl()
403 CeedCallBackend(CeedMalloc(elem_size * num_elem, &impl->h_offsets_owned)); in CeedElemRestrictionCreate_Sycl()
404 memcpy(impl->h_offsets_owned, offsets, elem_size * num_elem * sizeof(CeedInt)); in CeedElemRestrictionCreate_Sycl()
405 impl->h_offsets_borrowed = NULL; in CeedElemRestrictionCreate_Sycl()
406 impl->h_offsets = impl->h_offsets_owned; in CeedElemRestrictionCreate_Sycl()
410 impl->h_offsets_owned = (CeedInt *)offsets; in CeedElemRestrictionCreate_Sycl()
411 impl->h_offsets_borrowed = NULL; in CeedElemRestrictionCreate_Sycl()
412 impl->h_offsets = impl->h_offsets_owned; in CeedElemRestrictionCreate_Sycl()
415 impl->h_offsets_owned = NULL; in CeedElemRestrictionCreate_Sycl()
416 impl->h_offsets_borrowed = (CeedInt *)offsets; in CeedElemRestrictionCreate_Sycl()
417 impl->h_offsets = impl->h_offsets_borrowed; in CeedElemRestrictionCreate_Sycl()
421 …CeedCallSycl(ceed, impl->d_offsets_owned = sycl::malloc_device<CeedInt>(size, data->sycl_device, d… in CeedElemRestrictionCreate_Sycl()
423 // -- Order queue in CeedElemRestrictionCreate_Sycl()
424 sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier(); in CeedElemRestrictionCreate_Sycl() local
425 …sycl::event copy_event = data->sycl_queue.copy<CeedInt>(impl->h_offsets, impl->d_offsets_owned, si… in CeedElemRestrictionCreate_Sycl()
426 // -- Wait for copy to finish and handle exceptions in CeedElemRestrictionCreate_Sycl()
428 impl->d_offsets = impl->d_offsets_owned; in CeedElemRestrictionCreate_Sycl()
436 …CeedCallSycl(ceed, impl->d_offsets_owned = sycl::malloc_device<CeedInt>(size, data->sycl_device, d… in CeedElemRestrictionCreate_Sycl()
438 // -- Order queue in CeedElemRestrictionCreate_Sycl()
439 sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier(); in CeedElemRestrictionCreate_Sycl() local
440 …sycl::event copy_event = data->sycl_queue.copy<CeedInt>(offsets, impl->d_offsets_owned, size, {e}); in CeedElemRestrictionCreate_Sycl()
441 // -- Wait for copy to finish and handle exceptions in CeedElemRestrictionCreate_Sycl()
443 impl->d_offsets = impl->d_offsets_owned; in CeedElemRestrictionCreate_Sycl()
447 impl->d_offsets_owned = (CeedInt *)offsets; in CeedElemRestrictionCreate_Sycl()
448 impl->d_offsets_borrowed = NULL; in CeedElemRestrictionCreate_Sycl()
449 impl->d_offsets = impl->d_offsets_owned; in CeedElemRestrictionCreate_Sycl()
452 impl->d_offsets_owned = NULL; in CeedElemRestrictionCreate_Sycl()
453 impl->d_offsets_borrowed = (CeedInt *)offsets; in CeedElemRestrictionCreate_Sycl()
454 impl->d_offsets = impl->d_offsets_borrowed; in CeedElemRestrictionCreate_Sycl()
457 CeedCallBackend(CeedMalloc(elem_size * num_elem, &impl->h_offsets_owned)); in CeedElemRestrictionCreate_Sycl()
459 // -- Order queue in CeedElemRestrictionCreate_Sycl()
460 sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier(); in CeedElemRestrictionCreate_Sycl() local
461 …sycl::event copy_event = data->sycl_queue.copy<CeedInt>(impl->d_offsets, impl->h_offsets_owned, el… in CeedElemRestrictionCreate_Sycl()
462 // -- Wait for copy to finish and handle exceptions in CeedElemRestrictionCreate_Sycl()
464 impl->h_offsets = impl->h_offsets_owned; in CeedElemRestrictionCreate_Sycl()