| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-read-write-templates.h | 45 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided1d() argument 51 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 58 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided1d() argument 64 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided1d() 94 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided2d() argument 100 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 107 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided2d() argument 113 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided2d() 145 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided3d() argument 152 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; in WriteElementStrided3d() [all …]
|
| H A D | cuda-gen-templates.h | 60 …gle(SharedData_Cuda &data, const CeedInt n, const CeedScalar value, CeedScalar *__restrict__ r_v) { in SetEVecStandard1d_Single() argument 65 r_v[target_comp] = value; in SetEVecStandard1d_Single() 102 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d() argument 107 …for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[comp… in WriteLVecStandard1d() 113 … const CeedInt *__restrict__ indices, const CeedScalar *__restrict__ r_v, in WriteLVecStandard1d_Single() argument 121 atomicAdd(&d_v[ind + COMP_STRIDE * target_comp], r_v[target_comp]); in WriteLVecStandard1d_Single() 130 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_Assembly() argument 139 …* e_vec_size + (in_comp * NUM_COMP + comp) * P_1D * P_1D + out_node * P_1D + in_node] += r_v[comp]; in WriteLVecStandard1d_Assembly() 149 … const CeedInt output_offset, const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_QFAssembly() argument 154 … d_v[ind + (input_offset * NUM_COMP_OUT + output_offset + comp) * (Q_1D * num_elem)] = r_v[comp]; in WriteLVecStandard1d_QFAssembly() [all …]
|
| H A D | cuda-ref-qfunction.h | 26 inline __device__ void writeQuads(const CeedInt quad, const CeedInt num_qpts, const CeedScalar *r_v… in writeQuads() argument 28 d_v[quad + num_qpts * comp] = r_v[comp]; in writeQuads()
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-read-write-templates.h | 45 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided1d() argument 51 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 58 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided1d() argument 64 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided1d() 94 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided2d() argument 100 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 107 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in SumElementStrided2d() argument 113 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided2d() 145 … const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { in WriteElementStrided3d() argument 152 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; in WriteElementStrided3d() [all …]
|
| H A D | hip-gen-templates.h | 60 …ngle(SharedData_Hip &data, const CeedInt n, const CeedScalar value, CeedScalar *__restrict__ r_v) { in SetEVecStandard1d_Single() argument 65 r_v[target_comp] = value; in SetEVecStandard1d_Single() 101 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d() argument 106 …for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[comp… in WriteLVecStandard1d() 112 … const CeedInt *__restrict__ indices, const CeedScalar *__restrict__ r_v, in WriteLVecStandard1d_Single() argument 120 atomicAdd(&d_v[ind + COMP_STRIDE * target_comp], r_v[target_comp]); in WriteLVecStandard1d_Single() 129 … const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_Assembly() argument 138 …* e_vec_size + (in_comp * NUM_COMP + comp) * P_1D * P_1D + out_node * P_1D + in_node] += r_v[comp]; in WriteLVecStandard1d_Assembly() 148 … const CeedInt output_offset, const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) { in WriteLVecStandard1d_QFAssembly() argument 153 … d_v[ind + (input_offset * NUM_COMP_OUT + output_offset + comp) * (Q_1D * num_elem)] = r_v[comp]; in WriteLVecStandard1d_QFAssembly() [all …]
|
| H A D | hip-ref-qfunction.h | 26 inline __device__ void writeQuads(const CeedInt quad, const CeedInt num_qpts, const CeedScalar *r_v… in writeQuads() argument 28 d_v[quad + num_qpts * comp] = r_v[comp]; in writeQuads()
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-shared-basis-read-write-templates.h | 47 … const CeedInt strides_comp, const CeedInt strides_elem, private const CeedScalar *restrict r_v, in WriteElementStrided1d() argument 56 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 88 … const CeedInt strides_comp, const CeedInt strides_elem, private const CeedScalar *restrict r_v, in WriteElementStrided2d() argument 98 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 132 … const CeedInt strides_comp, const CeedInt strides_elem, private const CeedScalar *restrict r_v, in WriteElementStrided3d() argument 143 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; in WriteElementStrided3d()
|
| H A D | sycl-gen-templates.h | 69 …const global CeedInt *restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicS… in writeDofsOffset1d() argument 77 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[comp], memory_order_relaxed, memory… in writeDofsOffset1d() 85 … const CeedInt strides_elem, const CeedInt num_elem, private const CeedScalar *restrict r_v, in writeDofsStrided1d() argument 94 d_v[ind + comp * strides_comp] = r_v[comp]; in writeDofsStrided1d() 140 …const global CeedInt *restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicS… in writeDofsOffset2d() argument 149 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[comp], memory_order_relaxed, memory… in writeDofsOffset2d() 157 … const CeedInt strides_elem, const CeedInt num_elem, const private CeedScalar *restrict r_v, in writeDofsStrided2d() argument 166 for (CeedInt comp = 0; comp < num_comp; ++comp) d_v[ind + comp * strides_comp] += r_v[comp]; in writeDofsStrided2d() 248 …const global CeedInt *restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicS… in writeDofsOffset3d() argument 258 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[z + comp * P_1D], memory_order_rela… in writeDofsOffset3d() [all …]
|