| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-read-write-templates.h | 45 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided1d() argument 51 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 58 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided1d() argument 64 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided1d() 94 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided2d() argument 100 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 107 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided2d() argument 113 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided2d() 145 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided3d() argument 152 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; in WriteElementStrided3d() [all …]
|
| H A D | cuda-gen-templates.h | 102 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d() argument 107 …for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[comp… in WriteLVecStandard1d() 114 CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_Single() argument 121 atomicAdd(&d_v[ind + COMP_STRIDE * target_comp], r_v[target_comp]); in WriteLVecStandard1d_Single() 130 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_Assembly() argument 139 …d_v[elem * e_vec_size * e_vec_size + (in_comp * NUM_COMP + comp) * P_1D * P_1D + out_node * P_1D +… in WriteLVecStandard1d_Assembly() 149 … const CeedInt output_offset, const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_QFAssembly() argument 154 … d_v[ind + (input_offset * NUM_COMP_OUT + output_offset + comp) * (Q_1D * num_elem)] = r_v[comp]; in WriteLVecStandard1d_QFAssembly() 164 CeedScalar *__restrict__ d_v) { in WriteLVecStrided1d() argument 169 for (CeedInt comp = 0; comp < NUM_COMP; comp++) d_v[ind + comp * STRIDES_COMP] += r_v[comp]; in WriteLVecStrided1d() [all …]
|
| H A D | cuda-ref-qfunction.h | 26 …id writeQuads(const CeedInt quad, const CeedInt num_qpts, const CeedScalar *r_v, CeedScalar *d_v) { in writeQuads() argument 28 d_v[quad + num_qpts * comp] = r_v[comp]; in writeQuads()
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-read-write-templates.h | 45 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided1d() argument 51 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 58 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided1d() argument 64 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided1d() 94 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided2d() argument 100 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 107 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided2d() argument 113 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided2d() 145 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided3d() argument 152 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; in WriteElementStrided3d() [all …]
|
| H A D | hip-gen-templates.h | 101 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d() argument 106 …for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[comp… in WriteLVecStandard1d() 113 CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_Single() argument 120 atomicAdd(&d_v[ind + COMP_STRIDE * target_comp], r_v[target_comp]); in WriteLVecStandard1d_Single() 129 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_Assembly() argument 138 …d_v[elem * e_vec_size * e_vec_size + (in_comp * NUM_COMP + comp) * P_1D * P_1D + out_node * P_1D +… in WriteLVecStandard1d_Assembly() 148 … const CeedInt output_offset, const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_QFAssembly() argument 153 … d_v[ind + (input_offset * NUM_COMP_OUT + output_offset + comp) * (Q_1D * num_elem)] = r_v[comp]; in WriteLVecStandard1d_QFAssembly() 163 CeedScalar *__restrict__ d_v) { in WriteLVecStrided1d() argument 168 for (CeedInt comp = 0; comp < NUM_COMP; comp++) d_v[ind + comp * STRIDES_COMP] += r_v[comp]; in WriteLVecStrided1d() [all …]
|
| H A D | hip-ref-qfunction.h | 26 …id writeQuads(const CeedInt quad, const CeedInt num_qpts, const CeedScalar *r_v, CeedScalar *d_v) { in writeQuads() argument 28 d_v[quad + num_qpts * comp] = r_v[comp]; in writeQuads()
|
| /libCEED/backends/cuda-ref/ |
| H A D | ceed-cuda-ref-restriction.c | 121 CeedScalar *d_v; in CeedElemRestrictionApply_Cuda_Core() local 137 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedElemRestrictionApply_Cuda_Core() 140 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedElemRestrictionApply_Cuda_Core() 154 void *args[] = {&d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 160 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 166 void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 170 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 177 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 181 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 185 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() [all …]
|
| H A D | ceed-cuda-ref-basis.c | 29 CeedScalar *d_v; in CeedBasisApplyCore_Cuda() local 39 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyCore_Cuda() 43 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyCore_Cuda() 51 … *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyCore_Cuda() 57 …s[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; in CeedBasisApplyCore_Cuda() 64 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; in CeedBasisApplyCore_Cuda() 80 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApplyCore_Cuda() 109 CeedScalar *d_v; in CeedBasisApplyAtPointsCore_Cuda() local 199 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Cuda() 203 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Cuda() [all …]
|
| /libCEED/backends/hip-ref/ |
| H A D | ceed-hip-ref-restriction.c | 122 CeedScalar *d_v; in CeedElemRestrictionApply_Hip_Core() local 138 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedElemRestrictionApply_Hip_Core() 141 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedElemRestrictionApply_Hip_Core() 155 void *args[] = {&d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 161 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 167 void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 171 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 178 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 182 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 186 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() [all …]
|
| H A D | ceed-hip-ref-basis.c | 28 CeedScalar *d_v; in CeedBasisApplyCore_Hip() local 38 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyCore_Hip() 42 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyCore_Hip() 51 … *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyCore_Hip() 57 …s[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; in CeedBasisApplyCore_Hip() 64 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; in CeedBasisApplyCore_Hip() 80 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApplyCore_Hip() 108 CeedScalar *d_v; in CeedBasisApplyAtPointsCore_Hip() local 198 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Hip() 202 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Hip() [all …]
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-shared-basis-read-write-templates.h | 48 global CeedScalar *restrict d_v) { in WriteElementStrided1d() argument 56 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 89 global CeedScalar *restrict d_v) { in WriteElementStrided2d() argument 98 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 133 global CeedScalar *restrict d_v) { in WriteElementStrided3d() argument 143 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; in WriteElementStrided3d()
|
| H A D | sycl-gen-templates.h | 69 …*restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicScalar *restrict d_v) { in writeDofsOffset1d() argument 77 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[comp], memory_order_relaxed, memory… in writeDofsOffset1d() 86 global CeedScalar *restrict d_v) { in writeDofsStrided1d() argument 94 d_v[ind + comp * strides_comp] = r_v[comp]; in writeDofsStrided1d() 140 …*restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicScalar *restrict d_v) { in writeDofsOffset2d() argument 149 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[comp], memory_order_relaxed, memory… in writeDofsOffset2d() 158 global CeedScalar *restrict d_v) { in writeDofsStrided2d() argument 166 for (CeedInt comp = 0; comp < num_comp; ++comp) d_v[ind + comp * strides_comp] += r_v[comp]; in writeDofsStrided2d() 248 …*restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicScalar *restrict d_v) { in writeDofsOffset3d() argument 258 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[z + comp * P_1D], memory_order_rela… in writeDofsOffset3d() [all …]
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 97 CeedScalar *d_v; in CeedBasisApplyTensorCore_Hip_shared() local 110 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyTensorCore_Hip_shared() 112 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyTensorCore_Hip_shared() 125 void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Hip_shared() 177 void *grad_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_grad_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Hip_shared() 222 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; in CeedBasisApplyTensorCore_Hip_shared() 254 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApplyTensorCore_Hip_shared() 282 CeedScalar *d_v; in CeedBasisApplyAtPointsCore_Hip_shared() local 374 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Hip_shared() 376 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Hip_shared() [all …]
|
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 30 CeedScalar *d_v; in CeedBasisApplyTensorCore_Cuda_shared() local 43 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyTensorCore_Cuda_shared() 45 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyTensorCore_Cuda_shared() 58 void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared() 112 void *grad_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_grad_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared() 158 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared() 188 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApplyTensorCore_Cuda_shared() 217 CeedScalar *d_v; in CeedBasisApplyAtPointsCore_Cuda_shared() local 308 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Cuda_shared() 310 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyAtPointsCore_Cuda_shared() [all …]
|
| /libCEED/backends/sycl-ref/ |
| H A D | ceed-sycl-ref-basis.sycl.cpp | 275 CeedScalar *d_v; in CeedBasisApply_Sycl() local 286 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApply_Sycl() 295 data->sycl_queue.fill<CeedScalar>(d_v, 0, length, e); in CeedBasisApply_Sycl() 302 …d(CeedBasisApplyInterp_Sycl<true>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 304 …(CeedBasisApplyInterp_Sycl<false>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 309 …end(CeedBasisApplyGrad_Sycl<true>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 311 …nd(CeedBasisApplyGrad_Sycl<false>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 316 CeedCallBackend(CeedBasisApplyWeight_Sycl(data->sycl_queue, num_elem, impl, d_v)); in CeedBasisApply_Sycl() 328 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApply_Sycl() 461 CeedScalar *d_v; in CeedBasisApplyNonTensor_Sycl() local [all …]
|
| H A D | ceed-sycl-restriction.sycl.cpp | 153 CeedScalar *d_v; in CeedElemRestrictionApply_Sycl() local 164 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedElemRestrictionApply_Sycl() 167 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedElemRestrictionApply_Sycl() 175 CeedCallBackend(CeedElemRestrictionOffsetNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 178 CeedCallBackend(CeedElemRestrictionStridedNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 184 CeedCallBackend(CeedElemRestrictionOffsetTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 187 CeedCallBackend(CeedElemRestrictionStridedTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 197 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedElemRestrictionApply_Sycl()
|
| /libCEED/backends/sycl-shared/ |
| H A D | ceed-sycl-shared-basis.sycl.cpp | 41 CeedScalar *d_v; in CeedBasisApplyTensor_Sycl_shared() local 51 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyTensor_Sycl_shared() 71 cgh.set_args(num_elem, impl->d_interp_1d, d_u, d_v); in CeedBasisApplyTensor_Sycl_shared() 94 cgh.set_args(num_elem, impl->d_interp_1d, d_grad_1d, d_u, d_v); in CeedBasisApplyTensor_Sycl_shared() 114 cgh.set_args(num_elem, impl->d_q_weight_1d, d_v); in CeedBasisApplyTensor_Sycl_shared() 128 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApplyTensor_Sycl_shared()
|
| /libCEED/backends/magma/ |
| H A D | ceed-magma-basis.c | 35 CeedScalar *d_v; in CeedBasisApplyCore_Magma() local 56 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyCore_Magma() 57 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyCore_Magma() 117 …void *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, … in CeedBasisApplyCore_Magma() 194 …>d_interp_1d, &impl->d_grad_1d, &d_u, &u_elem_stride, &u_comp_stride, &u_dim_stride, &d_v, in CeedBasisApplyCore_Magma() 231 void *args[] = {&impl->d_q_weight_1d, &d_v, &elem_dofs_size, &num_elem}; in CeedBasisApplyCore_Magma() 251 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); in CeedBasisApplyCore_Magma() 283 CeedScalar *d_v; in CeedBasisApplyNonTensorCore_Magma() local 299 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyNonTensorCore_Magma() 300 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); in CeedBasisApplyNonTensorCore_Magma() [all …]
|