Lines Matching refs:CeedInt
20 inline void loadMatrix(const CeedInt N, const CeedScalar *restrict d_B, CeedScalar *restrict B) { in loadMatrix()
21 const CeedInt item_id = get_local_linear_id(); in loadMatrix()
22 const CeedInt group_size = get_local_size(0) * get_local_size(1) * get_local_size(2); in loadMatrix()
23 for (CeedInt i = item_id; i < N; i += group_size) B[i] = d_B[i]; in loadMatrix()
33 inline void readDofsOffset1d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt P_1D… in readDofsOffset1d()
34 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readDofsOffset1d()
35 const CeedInt item_id_x = get_local_id(0); in readDofsOffset1d()
36 const CeedInt elem = get_global_id(2); in readDofsOffset1d()
39 const CeedInt node = item_id_x; in readDofsOffset1d()
40 const CeedInt ind = indices[node + elem * P_1D]; in readDofsOffset1d()
41 for (CeedInt comp = 0; comp < num_comp; ++comp) { in readDofsOffset1d()
50 inline void readDofsStrided1d(const CeedInt num_comp, const CeedInt P_1D, const CeedInt strides_nod… in readDofsStrided1d()
51 … const CeedInt strides_elem, const CeedInt num_elem, global const CeedScalar *restrict d_u, in readDofsStrided1d()
53 const CeedInt item_id_x = get_local_id(0); in readDofsStrided1d()
54 const CeedInt elem = get_global_id(2); in readDofsStrided1d()
57 const CeedInt node = item_id_x; in readDofsStrided1d()
58 const CeedInt ind = node * strides_node + elem * strides_elem; in readDofsStrided1d()
59 for (CeedInt comp = 0; comp < num_comp; comp++) { in readDofsStrided1d()
68 inline void writeDofsOffset1d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt P_1… in writeDofsOffset1d()
69 …const global CeedInt *restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicS… in writeDofsOffset1d()
70 const CeedInt item_id_x = get_local_id(0); in writeDofsOffset1d()
71 const CeedInt elem = get_global_id(2); in writeDofsOffset1d()
74 const CeedInt node = item_id_x; in writeDofsOffset1d()
75 const CeedInt ind = indices[node + elem * P_1D]; in writeDofsOffset1d()
76 for (CeedInt comp = 0; comp < num_comp; ++comp) in writeDofsOffset1d()
84 inline void writeDofsStrided1d(const CeedInt num_comp, const CeedInt P_1D, const CeedInt strides_no… in writeDofsStrided1d()
85 … const CeedInt strides_elem, const CeedInt num_elem, private const CeedScalar *restrict r_v, in writeDofsStrided1d()
87 const CeedInt item_id_x = get_local_id(0); in writeDofsStrided1d()
88 const CeedInt elem = get_global_id(2); in writeDofsStrided1d()
91 const CeedInt node = item_id_x; in writeDofsStrided1d()
92 const CeedInt ind = node * strides_node + elem * strides_elem; in writeDofsStrided1d()
93 for (CeedInt comp = 0; comp < num_comp; comp++) { in writeDofsStrided1d()
106 inline void readDofsOffset2d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt P_1D… in readDofsOffset2d()
107 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readDofsOffset2d()
108 const CeedInt item_id_x = get_local_id(0); in readDofsOffset2d()
109 const CeedInt item_id_y = get_local_id(1); in readDofsOffset2d()
110 const CeedInt elem = get_global_id(2); in readDofsOffset2d()
113 const CeedInt node = item_id_x + item_id_y * P_1D; in readDofsOffset2d()
114 const CeedInt ind = indices[node + elem * P_1D * P_1D]; in readDofsOffset2d()
115 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + strides_comp * comp]; in readDofsOffset2d()
122 inline void readDofsStrided2d(const CeedInt num_comp, const CeedInt P_1D, const CeedInt strides_nod… in readDofsStrided2d()
123 … const CeedInt strides_elem, const CeedInt num_elem, const global CeedScalar *restrict d_u, in readDofsStrided2d()
125 const CeedInt item_id_x = get_local_id(0); in readDofsStrided2d()
126 const CeedInt item_id_y = get_local_id(1); in readDofsStrided2d()
127 const CeedInt elem = get_global_id(2); in readDofsStrided2d()
130 const CeedInt node = item_id_x + item_id_y * P_1D; in readDofsStrided2d()
131 const CeedInt ind = node * strides_node + elem * strides_elem; in readDofsStrided2d()
132 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + comp * strides_comp]; in readDofsStrided2d()
139 inline void writeDofsOffset2d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt P_1… in writeDofsOffset2d()
140 …const global CeedInt *restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicS… in writeDofsOffset2d()
141 const CeedInt item_id_x = get_local_id(0); in writeDofsOffset2d()
142 const CeedInt item_id_y = get_local_id(1); in writeDofsOffset2d()
143 const CeedInt elem = get_global_id(2); in writeDofsOffset2d()
146 const CeedInt node = item_id_x + item_id_y * P_1D; in writeDofsOffset2d()
147 const CeedInt ind = indices[node + elem * P_1D * P_1D]; in writeDofsOffset2d()
148 for (CeedInt comp = 0; comp < num_comp; ++comp) in writeDofsOffset2d()
156 inline void writeDofsStrided2d(const CeedInt num_comp, const CeedInt P_1D, const CeedInt strides_no… in writeDofsStrided2d()
157 … const CeedInt strides_elem, const CeedInt num_elem, const private CeedScalar *restrict r_v, in writeDofsStrided2d()
159 const CeedInt item_id_x = get_local_id(0); in writeDofsStrided2d()
160 const CeedInt item_id_y = get_local_id(1); in writeDofsStrided2d()
161 const CeedInt elem = get_global_id(2); in writeDofsStrided2d()
164 const CeedInt node = item_id_x + item_id_y * P_1D; in writeDofsStrided2d()
165 const CeedInt ind = node * strides_node + elem * strides_elem; in writeDofsStrided2d()
166 for (CeedInt comp = 0; comp < num_comp; ++comp) d_v[ind + comp * strides_comp] += r_v[comp]; in writeDofsStrided2d()
177 inline void readDofsOffset3d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt P_1D… in readDofsOffset3d()
178 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readDofsOffset3d()
179 const CeedInt item_id_x = get_local_id(0); in readDofsOffset3d()
180 const CeedInt item_id_y = get_local_id(1); in readDofsOffset3d()
181 const CeedInt elem = get_global_id(2); in readDofsOffset3d()
184 for (CeedInt z = 0; z < P_1D; ++z) { in readDofsOffset3d()
185 const CeedInt node = item_id_x + P_1D * (item_id_y + P_1D * z); in readDofsOffset3d()
186 const CeedInt ind = indices[node + elem * P_1D * P_1D * P_1D]; in readDofsOffset3d()
187 …for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[z + comp * P_1D] = d_u[ind + strides_comp * co… in readDofsOffset3d()
195 inline void readDofsStrided3d(const CeedInt num_comp, const CeedInt P_1D, const CeedInt strides_nod… in readDofsStrided3d()
196 … const CeedInt strides_elem, const CeedInt num_elem, const global CeedScalar *restrict d_u, in readDofsStrided3d()
198 const CeedInt item_id_x = get_local_id(0); in readDofsStrided3d()
199 const CeedInt item_id_y = get_local_id(1); in readDofsStrided3d()
200 const CeedInt elem = get_global_id(2); in readDofsStrided3d()
203 for (CeedInt z = 0; z < P_1D; ++z) { in readDofsStrided3d()
204 const CeedInt node = item_id_x + P_1D * (item_id_y + P_1D * z); in readDofsStrided3d()
205 const CeedInt ind = node * strides_node + elem * strides_elem; in readDofsStrided3d()
206 …for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[z + comp * P_1D] = d_u[ind + comp * strides_co… in readDofsStrided3d()
214 …SliceQuadsOffset3d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt Q_1D, const C… in readSliceQuadsOffset3d()
215 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readSliceQuadsOffset3d()
216 const CeedInt item_id_x = get_local_id(0); in readSliceQuadsOffset3d()
217 const CeedInt item_id_y = get_local_id(1); in readSliceQuadsOffset3d()
218 const CeedInt elem = get_global_id(2); in readSliceQuadsOffset3d()
221 const CeedInt node = item_id_x + Q_1D * (item_id_y + Q_1D * q); in readSliceQuadsOffset3d()
222 const CeedInt ind = indices[node + elem * Q_1D * Q_1D * Q_1D]; in readSliceQuadsOffset3d()
223 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + strides_comp * comp]; in readSliceQuadsOffset3d()
230 …d readSliceQuadsStrided3d(const CeedInt num_comp, const CeedInt Q_1D, CeedInt strides_node, CeedIn… in readSliceQuadsStrided3d()
231 … const CeedInt num_elem, const CeedInt q, const global CeedScalar *restrict d_u, in readSliceQuadsStrided3d()
233 const CeedInt item_id_x = get_local_id(0); in readSliceQuadsStrided3d()
234 const CeedInt item_id_y = get_local_id(1); in readSliceQuadsStrided3d()
235 const CeedInt elem = get_global_id(2); in readSliceQuadsStrided3d()
238 const CeedInt node = item_id_x + Q_1D * (item_id_y + Q_1D * q); in readSliceQuadsStrided3d()
239 const CeedInt ind = node * strides_node + elem * strides_elem; in readSliceQuadsStrided3d()
240 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + comp * strides_comp]; in readSliceQuadsStrided3d()
247 inline void writeDofsOffset3d(const CeedInt num_comp, const CeedInt strides_comp, const CeedInt P_1… in writeDofsOffset3d()
248 …const global CeedInt *restrict indices, const private CeedScalar *restrict r_v, global CeedAtomicS… in writeDofsOffset3d()
249 const CeedInt item_id_x = get_local_id(0); in writeDofsOffset3d()
250 const CeedInt item_id_y = get_local_id(1); in writeDofsOffset3d()
251 const CeedInt elem = get_global_id(2); in writeDofsOffset3d()
254 for (CeedInt z = 0; z < P_1D; ++z) { in writeDofsOffset3d()
255 const CeedInt node = item_id_x + item_id_y * P_1D + z * P_1D * P_1D; in writeDofsOffset3d()
256 const CeedInt ind = indices[node + elem * P_1D * P_1D * P_1D]; in writeDofsOffset3d()
257 for (CeedInt comp = 0; comp < num_comp; ++comp) in writeDofsOffset3d()
266 inline void writeDofsStrided3d(const CeedInt num_comp, const CeedInt P_1D, const CeedInt strides_no… in writeDofsStrided3d()
267 … const CeedInt strides_elem, const CeedInt num_elem, const private CeedScalar *restrict r_v, in writeDofsStrided3d()
269 const CeedInt item_id_x = get_local_id(0); in writeDofsStrided3d()
270 const CeedInt item_id_y = get_local_id(1); in writeDofsStrided3d()
271 const CeedInt elem = get_global_id(2); in writeDofsStrided3d()
274 for (CeedInt z = 0; z < P_1D; ++z) { in writeDofsStrided3d()
275 const CeedInt node = item_id_x + P_1D * (item_id_y + P_1D * z); in writeDofsStrided3d()
276 const CeedInt ind = node * strides_node + elem * strides_elem; in writeDofsStrided3d()
277 …for (CeedInt comp = 0; comp < num_comp; ++comp) d_v[ind + comp * strides_comp] += r_v[z + comp * P… in writeDofsStrided3d()
285 inline void gradCollo3d(const CeedInt num_comp, const CeedInt Q_1D, const CeedInt q, const private … in gradCollo3d()
287 const CeedInt item_id_x = get_local_id(0); in gradCollo3d()
288 const CeedInt item_id_y = get_local_id(1); in gradCollo3d()
290 for (CeedInt comp = 0; comp < num_comp; ++comp) { in gradCollo3d()
299 for (CeedInt i = 0; i < Q_1D; ++i) in gradCollo3d()
304 for (CeedInt i = 0; i < Q_1D; ++i) in gradCollo3d()
309 …for (CeedInt i = 0; i < Q_1D; ++i) r_V[comp + 2 * num_comp] += s_G[i + q * Q_1D] * r_U[i + comp * … in gradCollo3d()
319 inline void gradColloTranspose3d(const CeedInt num_comp, const CeedInt Q_1D, const CeedInt q, const… in gradColloTranspose3d()
321 const CeedInt item_id_x = get_local_id(0); in gradColloTranspose3d()
322 const CeedInt item_id_y = get_local_id(1); in gradColloTranspose3d()
324 for (CeedInt comp = 0; comp < num_comp; ++comp) { in gradColloTranspose3d()
332 for (CeedInt i = 0; i < Q_1D; ++i) in gradColloTranspose3d()
344 for (CeedInt i = 0; i < Q_1D; ++i) in gradColloTranspose3d()
351 for (CeedInt i = 0; i < Q_1D; ++i) in gradColloTranspose3d()