| /libCEED/backends/sycl/ |
| H A D | ceed-sycl-compile.sycl.cpp | 71 static inline int CeedJitCompileSource_Sycl(Ceed ceed, const sycl::device &sycl_device, const std::… in CeedJitCompileSource_Sycl() 73 …sycl::ext::libceed::online_compiler<sycl::ext::libceed::source_language::opencl_c> compiler(sycl_d… 77 } catch (sycl::ext::libceed::online_compile_error &e) { 86 static int CeedLoadModule_Sycl(Ceed ceed, const sycl::context &sycl_context, const sycl::device &sy… in CeedLoadModule_Sycl() 88 auto lz_context = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(sycl_context); in CeedLoadModule_Sycl() 89 auto lz_device = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(sycl_device); in CeedLoadModule_Sycl() 116 …*sycl_module = new SyclModule_t(sycl::make_kernel_bundle<sycl::backend::ext_oneapi_level_zero, syc… in CeedLoadModule_Sycl() 117 {lz_module, sycl::ext::oneapi::level_zero::ownership::transfer}, sycl_context)); in CeedLoadModule_Sycl() 143 …ed ceed, const SyclModule_t *sycl_module, const std::string &kernel_name, sycl::kernel **sycl_kern… in CeedGetKernel_Sycl() 150 …ze_module_handle_t lz_module = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(*sycl_module… in CeedGetKernel_Sycl() [all …]
|
| H A D | ceed-sycl-common.sycl.cpp | 23 sycl::info::device_type device_type; in CeedInit_Sycl() 25 device_type = sycl::info::device_type::gpu; in CeedInit_Sycl() 27 device_type = sycl::info::device_type::cpu; in CeedInit_Sycl() 32 auto sycl_devices = sycl::device::get_devices(device_type); in CeedInit_Sycl() 44 sycl::device sycl_device{sycl_devices[device_id]}; in CeedInit_Sycl() 46 if (!sycl_device.has(sycl::aspect::usm_device_allocations)) { in CeedInit_Sycl() 53 sycl::async_handler sycl_async_handler = [&](sycl::exception_list exceptionList) { in CeedInit_Sycl() 57 } catch (sycl::exception const &e) { in CeedInit_Sycl() 67 sycl::context sycl_context{sycl_device.get_platform().get_devices()}; in CeedInit_Sycl() 68 …sycl::queue sycl_queue{sycl_context, sycl_device, sycl_async_handler, sycl::property::queue::in_… in CeedInit_Sycl() [all …]
|
| H A D | online_compiler.hpp | 15 namespace sycl { namespace 63 class online_compile_error : public sycl::exception { 66 …online_compile_error(const std::string &Msg) : sycl::exception(make_error_code(errc::invalid), Msg… in online_compile_error() 84 DeviceType(sycl::info::device_type::all), 94 …online_compiler(sycl::info::device_type dev_type, device_arch arch, compiled_code_format fmt = com… in online_compiler() 101 online_compiler(const sycl::device &) in online_compiler() 104 DeviceType(sycl::info::device_type::all), 132 online_compiler<Lang> &setTargetDeviceType(sycl::info::device_type type) { in setTargetDeviceType() 170 sycl::info::device_type DeviceType;
|
| H A D | ceed-sycl-compile.hpp | 15 using SyclModule_t = sycl::kernel_bundle<sycl::bundle_state::executable>; 19 …ed ceed, const SyclModule_t *sycl_module, const std::string &kernel_name, sycl::kernel **sycl_kern… 21 CEED_INTERN int CeedRunKernelDimSharedSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, co…
|
| H A D | ceed-sycl-common.hpp | 19 } catch (sycl::exception const &e) { \ 35 sycl::context sycl_context; 36 sycl::device sycl_device; 37 sycl::queue sycl_queue;
|
| H A D | online_compiler.sycl.cpp | 16 namespace sycl { namespace 40 static std::vector<const char *> prepareOclocArgs(sycl::info::device_type DeviceType, device_arch D… in prepareOclocArgs() 44 if (DeviceType == sycl::info::device_type::gpu) { in prepareOclocArgs() 100 static std::vector<byte> compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceTy… in compileToSPIRV()
|
| /libCEED/backends/sycl-ref/ |
| H A D | ceed-sycl-ref-basis.sycl.cpp | 28 using SpecID = sycl::specialization_id<CeedInt>; 39 static int CeedBasisApplyInterp_Sycl(sycl::queue &sycl_queue, const SyclModule_t &sycl_module, Ceed… in CeedBasisApplyInterp_Sycl() 45 const sycl::device &sycl_device = sycl_queue.get_device(); in CeedBasisApplyInterp_Sycl() 48 sycl::range<1> local_range(work_group_size); in CeedBasisApplyInterp_Sycl() 49 sycl::range<1> global_range(num_elem * work_group_size); in CeedBasisApplyInterp_Sycl() 50 sycl::nd_range<1> kernel_range(global_range, local_range); in CeedBasisApplyInterp_Sycl() 52 std::vector<sycl::event> e; in CeedBasisApplyInterp_Sycl() 56 sycl_queue.submit([&](sycl::handler &cgh) { in CeedBasisApplyInterp_Sycl() 60 sycl::local_accessor<CeedScalar> s_mem(op_len + 2 * buf_len, cgh); in CeedBasisApplyInterp_Sycl() 62 …el_for<CeedBasisSyclInterp<is_transpose>>(kernel_range, [=](sycl::nd_item<1> work_item, sycl::kern… in CeedBasisApplyInterp_Sycl() [all …]
|
| H A D | ceed-sycl-restriction.sycl.cpp | 27 static int CeedElemRestrictionStridedNoTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestri… in CeedElemRestrictionStridedNoTranspose_Sycl() 35 sycl::range<1> kernel_range(num_elem * elem_size); in CeedElemRestrictionStridedNoTranspose_Sycl() 37 std::vector<sycl::event> e; in CeedElemRestrictionStridedNoTranspose_Sycl() 40 sycl_queue.parallel_for<CeedElemRestrSyclStridedNT>(kernel_range, e, [=](sycl::id<1> node) { in CeedElemRestrictionStridedNoTranspose_Sycl() 54 static int CeedElemRestrictionOffsetNoTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestric… in CeedElemRestrictionOffsetNoTranspose_Sycl() 62 sycl::range<1> kernel_range(num_elem * elem_size); in CeedElemRestrictionOffsetNoTranspose_Sycl() 64 std::vector<sycl::event> e; in CeedElemRestrictionOffsetNoTranspose_Sycl() 67 sycl_queue.parallel_for<CeedElemRestrSyclOffsetNT>(kernel_range, e, [=](sycl::id<1> node) { in CeedElemRestrictionOffsetNoTranspose_Sycl() 82 static int CeedElemRestrictionStridedTranspose_Sycl(sycl::queue &sycl_queue, const CeedElemRestrict… in CeedElemRestrictionStridedTranspose_Sycl() 91 sycl::range<1> kernel_range(num_elem * elem_size); in CeedElemRestrictionStridedTranspose_Sycl() [all …]
|
| H A D | ceed-sycl-vector.sycl.cpp | 58 …CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device… in CeedVectorSyncH2D_Sycl() 63 std::vector<sycl::event> e; in CeedVectorSyncH2D_Sycl() 97 std::vector<sycl::event> e; in CeedVectorSyncD2H_Sycl() 213 std::vector<sycl::event> e; in CeedVectorSetArrayDevice_Sycl() 220 …CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device<CeedScalar>(length, data->sycl_device… in CeedVectorSetArrayDevice_Sycl() 233 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context)); in CeedVectorSetArrayDevice_Sycl() 243 CeedCallSycl(ceed, sycl::free(impl->d_array_owned, data->sycl_context)); in CeedVectorSetArrayDevice_Sycl() 284 static int CeedDeviceSetValue_Sycl(sycl::queue &sycl_queue, CeedScalar *d_array, CeedSize length, C… in CeedDeviceSetValue_Sycl() 285 std::vector<sycl::event> e; in CeedDeviceSetValue_Sycl() 465 std::vector<sycl::event> e; in CeedVectorNorm_Sycl() [all …]
|
| H A D | ceed-sycl-ref-qfunctioncontext.sycl.cpp | 37 …CeedCallSycl(ceed, impl->d_data_owned = sycl::malloc_device(ctx_size, sycl_data->sycl_device, sycl… in CeedQFunctionContextSyncH2D_Sycl() 40 std::vector<sycl::event> e; in CeedQFunctionContextSyncH2D_Sycl() 43 sycl::event copy_event = sycl_data->sycl_queue.memcpy(impl->d_data, impl->h_data, ctx_size, e); in CeedQFunctionContextSyncH2D_Sycl() 74 std::vector<sycl::event> e; in CeedQFunctionContextSyncD2H_Sycl() 77 sycl::event copy_event = sycl_data->sycl_queue.memcpy(impl->h_data, impl->d_data, ctx_size, e); in CeedQFunctionContextSyncD2H_Sycl() 201 std::vector<sycl::event> e; in CeedQFunctionContextSetDataDevice_Sycl() 208 CeedCallSycl(ceed, sycl::free(impl->d_data_owned, sycl_data->sycl_context)); in CeedQFunctionContextSetDataDevice_Sycl() 217 …CeedCallSycl(ceed, impl->d_data_owned = sycl::malloc_device(ctx_size, sycl_data->sycl_device, sycl… in CeedQFunctionContextSetDataDevice_Sycl() 220 sycl::event copy_event = sycl_data->sycl_queue.memcpy(impl->d_data, data, ctx_size, e); in CeedQFunctionContextSetDataDevice_Sycl() 359 CeedCallSycl(ceed, sycl::free(impl->d_data_owned, sycl_data->sycl_context)); in CeedQFunctionContextDestroy_Sycl()
|
| H A D | ceed-sycl-ref-operator.sycl.cpp | 85 CeedCallSycl(ceed, sycl::free(impl->diag->d_eval_mode_in, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 86 CeedCallSycl(ceed, sycl::free(impl->diag->d_eval_mode_out, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 87 CeedCallSycl(ceed, sycl::free(impl->diag->d_identity, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 88 CeedCallSycl(ceed, sycl::free(impl->diag->d_interp_in, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 89 CeedCallSycl(ceed, sycl::free(impl->diag->d_interp_out, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 90 CeedCallSycl(ceed, sycl::free(impl->diag->d_grad_in, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 91 CeedCallSycl(ceed, sycl::free(impl->diag->d_grad_out, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 104 CeedCallSycl(ceed, sycl::free(impl->asmb->d_B_in, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 105 CeedCallSycl(ceed, sycl::free(impl->asmb->d_B_out, sycl_data->sycl_context)); in CeedOperatorDestroy_Sycl() 769 std::vector<sycl::event> e; in CeedOperatorAssembleDiagonalSetup_Sycl() [all …]
|
| H A D | ceed-sycl-ref-qfunction.sycl.cpp | 62 std::vector<sycl::event> e; in CeedQFunctionApply_Sycl() 67 ceed_Sycl->sycl_queue.submit([&](sycl::handler &cgh) { in CeedQFunctionApply_Sycl() 87 sycl::range<1> rounded_Q = ((Q + (wg_size - 1)) / wg_size) * wg_size; in CeedQFunctionApply_Sycl() 88 sycl::nd_range<1> kernel_range(rounded_Q, wg_size); in CeedQFunctionApply_Sycl()
|
| H A D | ceed-sycl-ref.hpp | 73 sycl::kernel *QFunction;
|
| /libCEED/backends/sycl-shared/ |
| H A D | ceed-sycl-shared-basis.sycl.cpp | 60 sycl::range<3> local_range(lrange[2], lrange[1], lrange[0]); in CeedBasisApplyTensor_Sycl_shared() 61 sycl::range<3> global_range(group_count * lrange[2], lrange[1], lrange[0]); in CeedBasisApplyTensor_Sycl_shared() 62 sycl::nd_range<3> kernel_range(global_range, local_range); in CeedBasisApplyTensor_Sycl_shared() 64 …sycl::kernel *interp_kernel = (t_mode == CEED_TRANSPOSE) ? impl->interp_transpose_kernel : impl->i… in CeedBasisApplyTensor_Sycl_shared() 66 std::vector<sycl::event> e; in CeedBasisApplyTensor_Sycl_shared() 69 ceed_Sycl->sycl_queue.submit([&](sycl::handler &cgh) { in CeedBasisApplyTensor_Sycl_shared() 81 sycl::range<3> local_range(lrange[2], lrange[1], lrange[0]); in CeedBasisApplyTensor_Sycl_shared() 82 sycl::range<3> global_range(group_count * lrange[2], lrange[1], lrange[0]); in CeedBasisApplyTensor_Sycl_shared() 83 sycl::nd_range<3> kernel_range(global_range, local_range); in CeedBasisApplyTensor_Sycl_shared() 85 …sycl::kernel *grad_kernel = (t_mode == CEED_TRANSPOSE) ? impl->grad_transpose_kernel : impl->g… in CeedBasisApplyTensor_Sycl_shared() [all …]
|
| H A D | ceed-sycl-shared.hpp | 22 sycl::kernel *interp_kernel; 23 sycl::kernel *interp_transpose_kernel; 24 sycl::kernel *grad_kernel; 25 sycl::kernel *grad_transpose_kernel; 26 sycl::kernel *weight_kernel;
|
| /libCEED/backends/sycl-gen/ |
| H A D | ceed-sycl-gen-operator.sycl.cpp | 140 sycl::range<3> local_range(block_sizes[2], block_sizes[1], block_sizes[0]); in CeedOperatorApplyAdd_Sycl_gen() 141 sycl::range<3> global_range(grid * block_sizes[2], block_sizes[1], block_sizes[0]); in CeedOperatorApplyAdd_Sycl_gen() 142 sycl::nd_range<3> kernel_range(global_range, local_range); in CeedOperatorApplyAdd_Sycl_gen() 145 std::vector<sycl::event> e; in CeedOperatorApplyAdd_Sycl_gen() 149 CeedCallSycl(ceed, ceed_Sycl->sycl_queue.submit([&](sycl::handler &cgh) { in CeedOperatorApplyAdd_Sycl_gen() 220 …impl->indices = sycl::malloc_device<FieldsInt_Sycl>(1, sycl_data->sycl_device, sycl_data->sycl_con… in CeedOperatorCreate_Sycl_gen() 221 impl->fields = sycl::malloc_host<Fields_Sycl>(1, sycl_data->sycl_context); in CeedOperatorCreate_Sycl_gen() 222 …impl->B = sycl::malloc_device<Fields_Sycl>(1, sycl_data->sycl_device, sycl_data->sycl_contex… in CeedOperatorCreate_Sycl_gen() 223 …impl->G = sycl::malloc_device<Fields_Sycl>(1, sycl_data->sycl_device, sycl_data->sycl_contex… in CeedOperatorCreate_Sycl_gen() 224 …impl->W = sycl::malloc_device<CeedScalar>(1, sycl_data->sycl_device, sycl_data->sycl_context… in CeedOperatorCreate_Sycl_gen()
|
| H A D | ceed-sycl-gen.hpp | 21 sycl::kernel *op;
|
| H A D | ceed-sycl-gen-qfunction.sycl.cpp | 37 CeedCallSycl(ceed, sycl::free(impl->d_c, data->sycl_context)); in CeedQFunctionDestroy_Sycl_gen()
|
| H A D | ceed-sycl-gen-operator-build.sycl.cpp | 227 …std::vector allowed_sg_sizes = sycl_data->sycl_device.get_info<sycl::info::device::sub_group_size… in CeedOperatorBuildKernel_Sycl_gen() 758 std::vector<sycl::event> e; in CeedOperatorBuildKernel_Sycl_gen() 762 sycl::event copy_B = sycl_data->sycl_queue.copy<Fields_Sycl>(&h_B, impl->B, 1, e); in CeedOperatorBuildKernel_Sycl_gen() 763 sycl::event copy_G = sycl_data->sycl_queue.copy<Fields_Sycl>(&h_G, impl->G, 1, e); in CeedOperatorBuildKernel_Sycl_gen() 764 …sycl::event copy_indices = sycl_data->sycl_queue.copy<FieldsInt_Sycl>(&h_indices, impl->indices, 1… in CeedOperatorBuildKernel_Sycl_gen() 766 CeedCallSycl(ceed, sycl::event::wait_and_throw({copy_B, copy_G, copy_indices})); in CeedOperatorBuildKernel_Sycl_gen()
|
| /libCEED/ |
| H A D | Makefile | 135 SYCL_FLAG.oneAPI := -fsycl -fno-sycl-id-queries-fit-in-int 324 sycl-core.cpp := $(sort $(wildcard backends/sycl/*.sycl.cpp)) 325 sycl-ref.cpp := $(sort $(wildcard backends/sycl-ref/*.sycl.cpp)) 326 sycl-shared.cpp:= $(sort $(wildcard backends/sycl-shared/*.sycl.cpp)) 327 sycl-gen.cpp := $(sort $(wildcard backends/sycl-gen/*.sycl.cpp)) 576 SYCL_BACKENDS = /gpu/sycl/ref /gpu/sycl/shared /gpu/sycl/gen 584 libceed.sycl += $(sycl-core.cpp) $(sycl-ref.cpp) $(sycl-shared.cpp) $(sycl-gen.cpp) 659 …u:%.cu=$(OBJDIR)/%.o) $(libceed.hip:%.hip.cpp=$(OBJDIR)/%.o) $(libceed.sycl:%.sycl.cpp=$(OBJDIR)/%… 680 $(OBJDIR)/%.o : $(CURDIR)/%.sycl.cpp | $$(@D)/.DIR 683 $(OBJDIR)/%.o : $(CURDIR)/%.sycl.cpp | $$(@D)/.DIR [all …]
|
| H A D | README.md | 66 To enable SYCL support, add `SYCL_DIR=/opt/sycl` or an appropriate directory. 177 | `/gpu/sycl/ref` | Reference pure SYCL kernels | Yes … 178 | `/gpu/sycl/shared` | Optimized pure SYCL kernels using shared memory | Yes … 211 The `/gpu/sycl/*` backends provide GPU performance strictly using SYCL.
|
| H A D | coverage.info | 528 SF:/home/jeremy/Dev/libCEED/backends/ceed-backend-list-sycl.h
|
| /libCEED/rust/libceed-sys/c-src/ |
| H A D | Makefile | 135 SYCL_FLAG.oneAPI := -fsycl -fno-sycl-id-queries-fit-in-int 324 sycl-core.cpp := $(sort $(wildcard backends/sycl/*.sycl.cpp)) 325 sycl-ref.cpp := $(sort $(wildcard backends/sycl-ref/*.sycl.cpp)) 326 sycl-shared.cpp:= $(sort $(wildcard backends/sycl-shared/*.sycl.cpp)) 327 sycl-gen.cpp := $(sort $(wildcard backends/sycl-gen/*.sycl.cpp)) 576 SYCL_BACKENDS = /gpu/sycl/ref /gpu/sycl/shared /gpu/sycl/gen 584 libceed.sycl += $(sycl-core.cpp) $(sycl-ref.cpp) $(sycl-shared.cpp) $(sycl-gen.cpp) 659 …u:%.cu=$(OBJDIR)/%.o) $(libceed.hip:%.hip.cpp=$(OBJDIR)/%.o) $(libceed.sycl:%.sycl.cpp=$(OBJDIR)/%… 680 $(OBJDIR)/%.o : $(CURDIR)/%.sycl.cpp | $$(@D)/.DIR 683 $(OBJDIR)/%.o : $(CURDIR)/%.sycl.cpp | $$(@D)/.DIR [all …]
|
| /libCEED/doc/sphinx/source/ |
| H A D | gpu.md | 16 …ed by C99 and all targeted backends (i.e. CUDA for `/gpu/cuda`, OpenCL/SYCL for `/gpu/sycl`, etc.).
|
| H A D | releasenotes.md | 57 - Added Sycl backends `/gpu/sycl/ref`, `/gpu/sycl/shared`, and `/gpu/sycl/gen`.
|