Lines Matching refs:sycl
28 using SpecID = sycl::specialization_id<CeedInt>;
39 static int CeedBasisApplyInterp_Sycl(sycl::queue &sycl_queue, const SyclModule_t &sycl_module, Ceed… in CeedBasisApplyInterp_Sycl()
45 const sycl::device &sycl_device = sycl_queue.get_device(); in CeedBasisApplyInterp_Sycl()
48 sycl::range<1> local_range(work_group_size); in CeedBasisApplyInterp_Sycl()
49 sycl::range<1> global_range(num_elem * work_group_size); in CeedBasisApplyInterp_Sycl()
50 sycl::nd_range<1> kernel_range(global_range, local_range); in CeedBasisApplyInterp_Sycl()
52 std::vector<sycl::event> e; in CeedBasisApplyInterp_Sycl()
56 sycl_queue.submit([&](sycl::handler &cgh) { in CeedBasisApplyInterp_Sycl()
60 sycl::local_accessor<CeedScalar> s_mem(op_len + 2 * buf_len, cgh); in CeedBasisApplyInterp_Sycl()
62 …el_for<CeedBasisSyclInterp<is_transpose>>(kernel_range, [=](sycl::nd_item<1> work_item, sycl::kern… in CeedBasisApplyInterp_Sycl()
82 sycl::group work_group = work_item.get_group(); in CeedBasisApplyInterp_Sycl()
87 CeedScalar *s_interp_1d = s_mem.get_multi_ptr<sycl::access::decorated::yes>().get(); in CeedBasisApplyInterp_Sycl()
111 work_item.barrier(sycl::access::fence_space::local_space); in CeedBasisApplyInterp_Sycl()
142 static int CeedBasisApplyGrad_Sycl(sycl::queue &sycl_queue, const SyclModule_t &sycl_module, CeedIn… in CeedBasisApplyGrad_Sycl()
149 const sycl::device &sycl_device = sycl_queue.get_device(); in CeedBasisApplyGrad_Sycl()
151 sycl::range<1> local_range(work_group_size); in CeedBasisApplyGrad_Sycl()
152 sycl::range<1> global_range(num_elem * work_group_size); in CeedBasisApplyGrad_Sycl()
153 sycl::nd_range<1> kernel_range(global_range, local_range); in CeedBasisApplyGrad_Sycl()
155 std::vector<sycl::event> e; in CeedBasisApplyGrad_Sycl()
159 sycl_queue.submit([&](sycl::handler &cgh) { in CeedBasisApplyGrad_Sycl()
163 sycl::local_accessor<CeedScalar> s_mem(2 * (op_len + buf_len), cgh); in CeedBasisApplyGrad_Sycl()
165 …llel_for<CeedBasisSyclGrad<is_transpose>>(kernel_range, [=](sycl::nd_item<1> work_item, sycl::kern… in CeedBasisApplyGrad_Sycl()
185 sycl::group work_group = work_item.get_group(); in CeedBasisApplyGrad_Sycl()
190 CeedScalar *s_interp_1d = s_mem.get_multi_ptr<sycl::access::decorated::yes>().get(); in CeedBasisApplyGrad_Sycl()
212 work_item.barrier(sycl::access::fence_space::local_space); in CeedBasisApplyGrad_Sycl()
245 static int CeedBasisApplyWeight_Sycl(sycl::queue &sycl_queue, CeedInt num_elem, const CeedBasis_Syc… in CeedBasisApplyWeight_Sycl()
253 sycl::range<3> kernel_range(num_elem * num_quad_z, num_quad_y, num_quad_x); in CeedBasisApplyWeight_Sycl()
255 std::vector<sycl::event> e; in CeedBasisApplyWeight_Sycl()
259 sycl_queue.parallel_for<CeedBasisSyclWeight>(kernel_range, e, [=](sycl::item<3> work_item) { in CeedBasisApplyWeight_Sycl()
292 std::vector<sycl::event> e; in CeedBasisApply_Sycl()
338 static int CeedBasisApplyNonTensorInterp_Sycl(sycl::queue &sycl_queue, CeedInt num_elem, CeedInt is… in CeedBasisApplyNonTensorInterp_Sycl()
353 sycl::range<2> kernel_range(num_elem, v_size); in CeedBasisApplyNonTensorInterp_Sycl()
355 std::vector<sycl::event> e; in CeedBasisApplyNonTensorInterp_Sycl()
359 sycl_queue.parallel_for<CeedBasisSyclInterpNT>(kernel_range, e, [=](sycl::id<2> indx) { in CeedBasisApplyNonTensorInterp_Sycl()
379 static int CeedBasisApplyNonTensorGrad_Sycl(sycl::queue &sycl_queue, CeedInt num_elem, CeedInt is_t… in CeedBasisApplyNonTensorGrad_Sycl()
399 sycl::range<2> kernel_range(num_elem, v_size); in CeedBasisApplyNonTensorGrad_Sycl()
401 std::vector<sycl::event> e; in CeedBasisApplyNonTensorGrad_Sycl()
405 sycl_queue.parallel_for<CeedBasisSyclGradNT>(kernel_range, e, [=](sycl::id<2> indx) { in CeedBasisApplyNonTensorGrad_Sycl()
435 static int CeedBasisApplyNonTensorWeight_Sycl(sycl::queue &sycl_queue, CeedInt num_elem, const Ceed… in CeedBasisApplyNonTensorWeight_Sycl()
439 sycl::range<2> kernel_range(num_elem, num_qpts); in CeedBasisApplyNonTensorWeight_Sycl()
441 std::vector<sycl::event> e; in CeedBasisApplyNonTensorWeight_Sycl()
445 sycl_queue.parallel_for<CeedBasisSyclWeightNT>(kernel_range, e, [=](sycl::id<2> indx) { in CeedBasisApplyNonTensorWeight_Sycl()
479 sycl::event e = data->sycl_queue.ext_oneapi_submit_barrier(); in CeedBasisApplyNonTensor_Sycl()
526 if (impl->d_q_weight_1d) CeedCallSycl(ceed, sycl::free(impl->d_q_weight_1d, data->sycl_context)); in CeedBasisDestroy_Sycl()
527 CeedCallSycl(ceed, sycl::free(impl->d_interp_1d, data->sycl_context)); in CeedBasisDestroy_Sycl()
528 CeedCallSycl(ceed, sycl::free(impl->d_grad_1d, data->sycl_context)); in CeedBasisDestroy_Sycl()
549 if (impl->d_q_weight) CeedCallSycl(ceed, sycl::free(impl->d_q_weight, data->sycl_context)); in CeedBasisDestroyNonTensor_Sycl()
550 CeedCallSycl(ceed, sycl::free(impl->d_interp, data->sycl_context)); in CeedBasisDestroyNonTensor_Sycl()
551 CeedCallSycl(ceed, sycl::free(impl->d_grad, data->sycl_context)); in CeedBasisDestroyNonTensor_Sycl()
586 std::vector<sycl::event> e; in CeedBasisCreateTensorH1_Sycl()
590 std::vector<sycl::event> copy_events; in CeedBasisCreateTensorH1_Sycl()
592 …CeedCallSycl(ceed, impl->d_q_weight_1d = sycl::malloc_device<CeedScalar>(Q_1d, data->sycl_device, … in CeedBasisCreateTensorH1_Sycl()
593 …sycl::event copy_weight = data->sycl_queue.copy<CeedScalar>(q_weight_1d, impl->d_q_weight_1d, Q_1d… in CeedBasisCreateTensorH1_Sycl()
598 …CeedCallSycl(ceed, impl->d_interp_1d = sycl::malloc_device<CeedScalar>(interp_length, data->sycl_d… in CeedBasisCreateTensorH1_Sycl()
599 …sycl::event copy_interp = data->sycl_queue.copy<CeedScalar>(interp_1d, impl->d_interp_1d, interp_l… in CeedBasisCreateTensorH1_Sycl()
602 …CeedCallSycl(ceed, impl->d_grad_1d = sycl::malloc_device<CeedScalar>(interp_length, data->sycl_dev… in CeedBasisCreateTensorH1_Sycl()
603 …sycl::event copy_grad = data->sycl_queue.copy<CeedScalar>(grad_1d, impl->d_grad_1d, interp_length,… in CeedBasisCreateTensorH1_Sycl()
606 CeedCallSycl(ceed, sycl::event::wait_and_throw(copy_events)); in CeedBasisCreateTensorH1_Sycl()
608 …std::vector<sycl::kernel_id> kernel_ids = {sycl::get_kernel_id<CeedBasisSyclInterp<1>>(), sycl::ge… in CeedBasisCreateTensorH1_Sycl()
609 … sycl::get_kernel_id<CeedBasisSyclGrad<1>>(), sycl::get_kernel_id<CeedBasisSyclGrad<0>>()}; in CeedBasisCreateTensorH1_Sycl()
611 …sycl::kernel_bundle<sycl::bundle_state::input> input_bundle = sycl::get_kernel_bundle<sycl::bundle… in CeedBasisCreateTensorH1_Sycl()
617 CeedCallSycl(ceed, impl->sycl_module = new SyclModule_t(sycl::build(input_bundle))); in CeedBasisCreateTensorH1_Sycl()
649 std::vector<sycl::event> e; in CeedBasisCreateH1_Sycl()
653 std::vector<sycl::event> copy_events; in CeedBasisCreateH1_Sycl()
655 …CeedCallSycl(ceed, impl->d_q_weight = sycl::malloc_device<CeedScalar>(num_qpts, data->sycl_device,… in CeedBasisCreateH1_Sycl()
656 …sycl::event copy_weight = data->sycl_queue.copy<CeedScalar>(q_weight, impl->d_q_weight, num_qpts, … in CeedBasisCreateH1_Sycl()
661 …CeedCallSycl(ceed, impl->d_interp = sycl::malloc_device<CeedScalar>(interp_length, data->sycl_devi… in CeedBasisCreateH1_Sycl()
662 …sycl::event copy_interp = data->sycl_queue.copy<CeedScalar>(interp, impl->d_interp, interp_length,… in CeedBasisCreateH1_Sycl()
666 …CeedCallSycl(ceed, impl->d_grad = sycl::malloc_device<CeedScalar>(grad_length, data->sycl_device, … in CeedBasisCreateH1_Sycl()
667 sycl::event copy_grad = data->sycl_queue.copy<CeedScalar>(grad, impl->d_grad, grad_length, e); in CeedBasisCreateH1_Sycl()
670 CeedCallSycl(ceed, sycl::event::wait_and_throw(copy_events)); in CeedBasisCreateH1_Sycl()