Lines Matching refs:e

30     for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {  in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()  local
34 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
35 … uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp]; in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
45 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() local
49 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
50 … uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]]; in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
67 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core() local
70 …vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_siz… in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core()
85 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core() local
88 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = in CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core()
89 …uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 … in CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core()
104 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core() local
109 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
110 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
111 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
112 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
113 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
117 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
118 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
119 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
120 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
121 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
122 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
123 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
127 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
128 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
129 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
130 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
131 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; in CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core()
147 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core() local
152 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
153 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
154 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
155 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
156 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
160 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
161 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
162 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
163 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
164 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
165 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
166 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
170 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
171 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
172 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
173 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
174 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); in CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core()
192 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedTranspose_Ref_Core() local
195 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { in CeedElemRestrictionApplyStridedTranspose_Ref_Core()
196 …vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_… in CeedElemRestrictionApplyStridedTranspose_Ref_Core()
206 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedTranspose_Ref_Core() local
209 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { in CeedElemRestrictionApplyStridedTranspose_Ref_Core()
210 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += in CeedElemRestrictionApplyStridedTranspose_Ref_Core()
211 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; in CeedElemRestrictionApplyStridedTranspose_Ref_Core()
228 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOffsetTranspose_Ref_Core() local
232 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { in CeedElemRestrictionApplyOffsetTranspose_Ref_Core()
235 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; in CeedElemRestrictionApplyOffsetTranspose_Ref_Core()
236 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; in CeedElemRestrictionApplyOffsetTranspose_Ref_Core()
252 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOrientedTranspose_Ref_Core() local
256 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { in CeedElemRestrictionApplyOrientedTranspose_Ref_Core()
259 …vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * e… in CeedElemRestrictionApplyOrientedTranspose_Ref_Core()
260 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; in CeedElemRestrictionApplyOrientedTranspose_Ref_Core()
277 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core() local
280 const CeedSize block_end = CeedIntMin(block_size, num_elem - e); in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
284 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
285 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
286 … uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
287 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
290 …CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc… in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
294 … vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
295 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
296 … uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
297 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
298 … uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
299 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
302 …CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc… in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
306 … vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
307 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
308 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
309 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
312 …CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc… in CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core()
329 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core() local
332 const CeedSize block_end = CeedIntMin(block_size, num_elem - e); in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
336 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
337 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
338 … uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
339 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
342 …CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc… in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
346 … vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
347 … abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
348 … uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
349 … abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
350 … uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
351 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
354 …CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc… in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
358 … vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
359 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
360 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
361 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
364 …CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc… in CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core()
379 for (CeedInt e = start; e < stop; e++) { in CeedElemRestrictionApplyAtPointsInElement_Ref_Core() local
380 l_vec_offset = impl->offsets[e]; in CeedElemRestrictionApplyAtPointsInElement_Ref_Core()
381 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); in CeedElemRestrictionApplyAtPointsInElement_Ref_Core()