1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include "ceed-ref.h"
15
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
20 const CeedInt start, const CeedInt stop, const CeedInt num_elem,
21 const CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
22 CeedScalar *__restrict__ vv) {
23 // No offsets provided, identity restriction
24 bool has_backend_strides;
25
26 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
27 if (has_backend_strides) {
28 // CPU backend strides are {1, elem_size, elem_size*num_comp}
29 // This if branch is left separate to allow better inlining
30 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
31 for (CeedSize k = 0; k < num_comp; k++) {
32 for (CeedSize n = 0; n < elem_size; n++) {
33 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
34 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
35 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp];
36 }
37 }
38 }
39 }
40 } else {
41 // User provided strides
42 CeedInt strides[3];
43
44 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
45 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
46 for (CeedSize k = 0; k < num_comp; k++) {
47 for (CeedSize n = 0; n < elem_size; n++) {
48 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
49 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
50 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]];
51 }
52 }
53 }
54 }
55 }
56 return CEED_ERROR_SUCCESS;
57 }
58
CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)59 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
60 const CeedInt comp_stride, const CeedInt start, const CeedInt stop,
61 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset,
62 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
63 // Default restriction with offsets
64 CeedElemRestriction_Ref *impl;
65
66 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
67 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
68 for (CeedSize k = 0; k < num_comp; k++) {
69 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
70 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
71 }
72 }
73 }
74 return CEED_ERROR_SUCCESS;
75 }
76
CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)77 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
78 const CeedInt comp_stride, const CeedInt start, const CeedInt stop,
79 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset,
80 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
81 // Restriction with orientations
82 CeedElemRestriction_Ref *impl;
83
84 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
85 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
86 for (CeedSize k = 0; k < num_comp; k++) {
87 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
88 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] =
89 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
90 }
91 }
92 }
93 return CEED_ERROR_SUCCESS;
94 }
95
CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)96 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
97 const CeedInt comp_stride, const CeedInt start, const CeedInt stop,
98 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset,
99 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
100 // Restriction with tridiagonal transformation
101 CeedElemRestriction_Ref *impl;
102
103 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
104 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
105 for (CeedSize k = 0; k < num_comp; k++) {
106 CeedSize n = 0;
107
108 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
109 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
110 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
111 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
112 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
113 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
114 }
115 for (n = 1; n < elem_size - 1; n++) {
116 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
117 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
118 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
119 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
120 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
121 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
122 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
123 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
124 }
125 }
126 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
127 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
128 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
129 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
130 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
131 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
132 }
133 }
134 }
135 return CEED_ERROR_SUCCESS;
136 }
137
CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)138 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
139 const CeedInt block_size, const CeedInt comp_stride,
140 const CeedInt start, const CeedInt stop, const CeedInt num_elem,
141 const CeedInt elem_size, const CeedSize v_offset,
142 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
143 // Restriction with (unsigned) tridiagonal transformation
144 CeedElemRestriction_Ref *impl;
145
146 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
147 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
148 for (CeedSize k = 0; k < num_comp; k++) {
149 CeedSize n = 0;
150
151 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
152 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
153 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
154 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
155 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
156 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
157 }
158 for (n = 1; n < elem_size - 1; n++) {
159 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
160 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
161 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
162 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
163 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
164 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
165 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
166 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
167 }
168 }
169 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
170 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
171 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
172 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
173 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
174 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
175 }
176 }
177 }
178 return CEED_ERROR_SUCCESS;
179 }
180
CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)181 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
182 const CeedInt start, const CeedInt stop, const CeedInt num_elem,
183 const CeedInt elem_size, const CeedSize v_offset,
184 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
185 // No offsets provided, identity restriction
186 bool has_backend_strides;
187
188 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
189 if (has_backend_strides) {
190 // CPU backend strides are {1, elem_size, elem_size*num_comp}
191 // This if brach is left separate to allow better inlining
192 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
193 for (CeedSize k = 0; k < num_comp; k++) {
194 for (CeedSize n = 0; n < elem_size; n++) {
195 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
196 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
197 }
198 }
199 }
200 }
201 } else {
202 // User provided strides
203 CeedInt strides[3];
204
205 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
206 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
207 for (CeedSize k = 0; k < num_comp; k++) {
208 for (CeedSize n = 0; n < elem_size; n++) {
209 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
210 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
211 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
212 }
213 }
214 }
215 }
216 }
217 return CEED_ERROR_SUCCESS;
218 }
219
CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)220 static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
221 const CeedInt comp_stride, const CeedInt start, const CeedInt stop,
222 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset,
223 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
224 // Default restriction with offsets
225 CeedElemRestriction_Ref *impl;
226
227 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
228 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
229 for (CeedSize k = 0; k < num_comp; k++) {
230 for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
231 // Iteration bound set to discard padding elements
232 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
233 CeedScalar vv_loc;
234
235 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset];
236 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
237 }
238 }
239 }
240 }
241 return CEED_ERROR_SUCCESS;
242 }
243
CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)244 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
245 const CeedInt comp_stride, const CeedInt start, const CeedInt stop,
246 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset,
247 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
248 // Restriction with orientations
249 CeedElemRestriction_Ref *impl;
250
251 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
252 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
253 for (CeedSize k = 0; k < num_comp; k++) {
254 for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
255 // Iteration bound set to discard padding elements
256 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
257 CeedScalar vv_loc;
258
259 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
260 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
261 }
262 }
263 }
264 }
265 return CEED_ERROR_SUCCESS;
266 }
267
CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)268 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
269 const CeedInt comp_stride, const CeedInt start, const CeedInt stop,
270 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset,
271 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
272 // Restriction with tridiagonal transformation
273 CeedElemRestriction_Ref *impl;
274 CeedScalar vv_loc[block_size];
275
276 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
277 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
278 for (CeedSize k = 0; k < num_comp; k++) {
279 // Iteration bound set to discard padding elements
280 const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
281 CeedSize n = 0;
282
283 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
284 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
285 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
286 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
287 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
288 }
289 for (CeedSize j = 0; j < block_end; j++) {
290 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
291 }
292 for (n = 1; n < elem_size - 1; n++) {
293 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
294 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
295 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
296 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
297 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
298 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
299 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
300 }
301 for (CeedSize j = 0; j < block_end; j++) {
302 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
303 }
304 }
305 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
306 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
307 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
308 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
309 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
310 }
311 for (CeedSize j = 0; j < block_end; j++) {
312 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
313 }
314 }
315 }
316 return CEED_ERROR_SUCCESS;
317 }
318
CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,const CeedInt num_elem,const CeedInt elem_size,const CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)319 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
320 const CeedInt block_size, const CeedInt comp_stride,
321 const CeedInt start, const CeedInt stop, const CeedInt num_elem,
322 const CeedInt elem_size, const CeedSize v_offset,
323 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
324 // Restriction with (unsigned) tridiagonal transformation
325 CeedElemRestriction_Ref *impl;
326 CeedScalar vv_loc[block_size];
327
328 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
329 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
330 for (CeedSize k = 0; k < num_comp; k++) {
331 // Iteration bound set to discard padding elements
332 const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
333 CeedSize n = 0;
334
335 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
336 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
337 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
338 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
339 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
340 }
341 for (CeedSize j = 0; j < block_end; j++) {
342 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
343 }
344 for (n = 1; n < elem_size - 1; n++) {
345 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
346 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
347 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
348 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
349 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
350 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
351 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
352 }
353 for (CeedSize j = 0; j < block_end; j++) {
354 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
355 }
356 }
357 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
358 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
359 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
360 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
361 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
362 }
363 for (CeedSize j = 0; j < block_end; j++) {
364 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
365 }
366 }
367 }
368 return CEED_ERROR_SUCCESS;
369 }
370
CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt start,const CeedInt stop,CeedTransposeMode t_mode,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)371 static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt start,
372 const CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
373 CeedScalar *__restrict__ vv) {
374 CeedInt num_points, l_vec_offset;
375 CeedSize e_vec_offset = 0;
376 CeedElemRestriction_Ref *impl;
377
378 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
379 for (CeedInt e = start; e < stop; e++) {
380 l_vec_offset = impl->offsets[e];
381 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
382 if (t_mode == CEED_NOTRANSPOSE) {
383 for (CeedSize i = 0; i < num_points; i++) {
384 for (CeedSize j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
385 }
386 } else {
387 for (CeedSize i = 0; i < num_points; i++) {
388 for (CeedSize j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] += uu[j * num_points + i + e_vec_offset];
389 }
390 }
391 e_vec_offset += num_points * (CeedSize)num_comp;
392 }
393 return CEED_ERROR_SUCCESS;
394 }
395
CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,const CeedInt start,const CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)396 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
397 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, CeedTransposeMode t_mode,
398 bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
399 CeedInt num_elem, elem_size;
400 CeedSize v_offset = 0;
401 CeedRestrictionType rstr_type;
402 const CeedScalar *uu;
403 CeedScalar *vv;
404
405 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
406 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
407 v_offset = start * block_size * elem_size * (CeedSize)num_comp;
408 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
409 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
410
411 if (t_mode == CEED_TRANSPOSE) {
412 // Sum into for transpose mode, E-vector to L-vector
413 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
414 } else {
415 // Overwrite for notranspose mode, L-vector to E-vector
416 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
417 }
418 if (t_mode == CEED_TRANSPOSE) {
419 // Restriction from E-vector to L-vector
420 // Performing v += r^T * u
421 // uu has shape [elem_size, num_comp, num_elem], row-major
422 // vv has shape [nnodes, num_comp]
423 // Sum into for transpose mode
424 switch (rstr_type) {
425 case CEED_RESTRICTION_STRIDED:
426 CeedCallBackend(CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu,
427 vv));
428 break;
429 case CEED_RESTRICTION_STANDARD:
430 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
431 v_offset, uu, vv));
432 break;
433 case CEED_RESTRICTION_ORIENTED:
434 if (use_signs) {
435 CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
436 elem_size, v_offset, uu, vv));
437 } else {
438 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
439 v_offset, uu, vv));
440 }
441 break;
442 case CEED_RESTRICTION_CURL_ORIENTED:
443 if (use_signs && use_orients) {
444 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
445 elem_size, v_offset, uu, vv));
446 } else if (use_orients) {
447 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
448 num_elem, elem_size, v_offset, uu, vv));
449 } else {
450 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
451 v_offset, uu, vv));
452 }
453 break;
454 case CEED_RESTRICTION_POINTS:
455 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
456 break;
457 }
458 } else {
459 // Restriction from L-vector to E-vector
460 // Perform: v = r * u
461 // vv has shape [elem_size, num_comp, num_elem], row-major
462 // uu has shape [nnodes, num_comp]
463 // Overwrite for notranspose mode
464 switch (rstr_type) {
465 case CEED_RESTRICTION_STRIDED:
466 CeedCallBackend(CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset,
467 uu, vv));
468 break;
469 case CEED_RESTRICTION_STANDARD:
470 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
471 v_offset, uu, vv));
472 break;
473 case CEED_RESTRICTION_ORIENTED:
474 if (use_signs) {
475 CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
476 elem_size, v_offset, uu, vv));
477 } else {
478 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
479 elem_size, v_offset, uu, vv));
480 }
481 break;
482 case CEED_RESTRICTION_CURL_ORIENTED:
483 if (use_signs && use_orients) {
484 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
485 elem_size, v_offset, uu, vv));
486 } else if (use_orients) {
487 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
488 num_elem, elem_size, v_offset, uu, vv));
489 } else {
490 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
491 elem_size, v_offset, uu, vv));
492 }
493 break;
494 case CEED_RESTRICTION_POINTS:
495 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
496 break;
497 }
498 }
499 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
500 CeedCallBackend(CeedVectorRestoreArray(v, &vv));
501 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
502 return CEED_ERROR_SUCCESS;
503 }
504
505 //------------------------------------------------------------------------------
506 // ElemRestriction Apply - Common Sizes
507 //------------------------------------------------------------------------------
CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)508 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
509 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
510 CeedVector v, CeedRequest *request) {
511 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
512 }
513
CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)514 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
515 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
516 CeedVector v, CeedRequest *request) {
517 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
518 }
519
CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)520 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
521 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
522 CeedVector v, CeedRequest *request) {
523 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
524 }
525
CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)526 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
527 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
528 CeedVector v, CeedRequest *request) {
529 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
530 }
531
CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)532 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
533 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
534 CeedVector v, CeedRequest *request) {
535 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
536 }
537
CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)538 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
539 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
540 CeedVector v, CeedRequest *request) {
541 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
542 }
543
CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)544 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
545 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
546 CeedVector v, CeedRequest *request) {
547 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
548 }
549
CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)550 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
551 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
552 CeedVector v, CeedRequest *request) {
553 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
554 }
555
556 // LCOV_EXCL_START
CeedElemRestrictionApply_Ref_410(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)557 static int CeedElemRestrictionApply_Ref_410(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
558 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
559 CeedVector v, CeedRequest *request) {
560 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
561 }
562
CeedElemRestrictionApply_Ref_411(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)563 static int CeedElemRestrictionApply_Ref_411(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
564 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
565 CeedVector v, CeedRequest *request) {
566 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
567 }
568
CeedElemRestrictionApply_Ref_480(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)569 static int CeedElemRestrictionApply_Ref_480(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
570 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
571 CeedVector v, CeedRequest *request) {
572 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
573 }
574
CeedElemRestrictionApply_Ref_481(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)575 static int CeedElemRestrictionApply_Ref_481(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
576 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
577 CeedVector v, CeedRequest *request) {
578 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
579 }
580
CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)581 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
582 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
583 CeedVector v, CeedRequest *request) {
584 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
585 }
586 // LCOV_EXCL_STOP
587
CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)588 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
589 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
590 CeedVector v, CeedRequest *request) {
591 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
592 }
593
594 // LCOV_EXCL_START
CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)595 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
596 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
597 CeedVector v, CeedRequest *request) {
598 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
599 }
600 // LCOV_EXCL_STOP
601
CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)602 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
603 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
604 CeedVector v, CeedRequest *request) {
605 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
606 }
607
608 //------------------------------------------------------------------------------
609 // ElemRestriction Apply
610 //------------------------------------------------------------------------------
CeedElemRestrictionApply_Ref(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)611 static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
612 CeedInt num_block, block_size, num_comp, comp_stride;
613 CeedElemRestriction_Ref *impl;
614
615 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
616 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
617 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
618 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
619 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
620 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
621 return CEED_ERROR_SUCCESS;
622 }
623
624 //------------------------------------------------------------------------------
625 // ElemRestriction Apply Unsigned
626 //------------------------------------------------------------------------------
CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)627 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
628 CeedRequest *request) {
629 CeedInt num_block, block_size, num_comp, comp_stride;
630 CeedElemRestriction_Ref *impl;
631
632 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
633 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
634 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
635 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
636 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
637 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
638 return CEED_ERROR_SUCCESS;
639 }
640
641 //------------------------------------------------------------------------------
642 // ElemRestriction Apply Unoriented
643 //------------------------------------------------------------------------------
CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)644 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
645 CeedRequest *request) {
646 CeedInt num_block, block_size, num_comp, comp_stride;
647 CeedElemRestriction_Ref *impl;
648
649 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
650 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
651 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
652 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
653 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
654 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
655 return CEED_ERROR_SUCCESS;
656 }
657
658 //------------------------------------------------------------------------------
659 // ElemRestriction Apply Points
660 //------------------------------------------------------------------------------
CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr,CeedInt elem,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)661 static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
662 CeedRequest *request) {
663 CeedInt num_comp;
664 CeedElemRestriction_Ref *impl;
665
666 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
667 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
668 return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
669 }
670
671 //------------------------------------------------------------------------------
672 // ElemRestriction Apply Block
673 //------------------------------------------------------------------------------
CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr,CeedInt block,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)674 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
675 CeedRequest *request) {
676 CeedInt block_size, num_comp, comp_stride;
677 CeedElemRestriction_Ref *impl;
678
679 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
680 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
681 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
682 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
683 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
684 return CEED_ERROR_SUCCESS;
685 }
686
687 //------------------------------------------------------------------------------
688 // ElemRestriction Get Offsets
689 //------------------------------------------------------------------------------
CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,CeedMemType mem_type,const CeedInt ** offsets)690 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
691 CeedElemRestriction_Ref *impl;
692
693 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
694
695 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
696
697 *offsets = impl->offsets;
698 return CEED_ERROR_SUCCESS;
699 }
700
701 //------------------------------------------------------------------------------
702 // ElemRestriction Get Orientations
703 //------------------------------------------------------------------------------
CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr,CeedMemType mem_type,const bool ** orients)704 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
705 CeedElemRestriction_Ref *impl;
706
707 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
708
709 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
710
711 *orients = impl->orients;
712 return CEED_ERROR_SUCCESS;
713 }
714
715 //------------------------------------------------------------------------------
716 // ElemRestriction Get Curl-Conforming Orientations
717 //------------------------------------------------------------------------------
CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr,CeedMemType mem_type,const CeedInt8 ** curl_orients)718 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
719 CeedElemRestriction_Ref *impl;
720
721 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
722
723 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
724
725 *curl_orients = impl->curl_orients;
726 return CEED_ERROR_SUCCESS;
727 }
728
729 //------------------------------------------------------------------------------
730 // ElemRestriction Destroy
731 //------------------------------------------------------------------------------
CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr)732 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) {
733 CeedElemRestriction_Ref *impl;
734
735 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
736 CeedCallBackend(CeedFree(&impl->offsets_owned));
737 CeedCallBackend(CeedFree(&impl->orients_owned));
738 CeedCallBackend(CeedFree(&impl->curl_orients_owned));
739 CeedCallBackend(CeedFree(&impl));
740 return CEED_ERROR_SUCCESS;
741 }
742
743 //------------------------------------------------------------------------------
744 // ElemRestriction Create
745 //------------------------------------------------------------------------------
CeedElemRestrictionCreate_Ref(CeedMemType mem_type,CeedCopyMode copy_mode,const CeedInt * offsets,const bool * orients,const CeedInt8 * curl_orients,CeedElemRestriction rstr)746 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
747 const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
748 Ceed ceed;
749 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
750 CeedRestrictionType rstr_type;
751 CeedElemRestriction_Ref *impl;
752
753 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
754 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
755 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
756 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
757 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
758 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
759 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
760 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
761
762 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
763
764 CeedCallBackend(CeedCalloc(1, &impl));
765 CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
766
767 // Set layouts
768 {
769 bool has_backend_strides;
770 CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
771
772 CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout));
773 if (rstr_type == CEED_RESTRICTION_STRIDED) {
774 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
775 if (has_backend_strides) {
776 CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout));
777 }
778 }
779 }
780
781 // Expand E-vector size for AtPoints
782 if (rstr_type == CEED_RESTRICTION_POINTS) {
783 CeedSize max_points = 0, num_points_total = 0;
784
785 for (CeedInt i = 0; i < num_elem; i++) {
786 CeedInt num_points = offsets[i + 1] - offsets[i];
787
788 max_points = CeedIntMax(max_points, num_points);
789 num_points_total += num_points;
790 }
791 // -- Increase size for last element
792 num_points_total += (max_points - (offsets[num_elem] - offsets[num_elem - 1]));
793 CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, num_points_total * num_comp));
794 }
795
796 // Offsets data
797 if (rstr_type != CEED_RESTRICTION_STRIDED) {
798 const char *resource;
799
800 // Check indices for ref or memcheck backends
801 {
802 Ceed current = ceed, ceed_parent = NULL;
803
804 CeedCallBackend(CeedGetParent(current, &ceed_parent));
805 CeedCallBackend(CeedGetResource(ceed_parent, &resource));
806 CeedCallBackend(CeedDestroy(&ceed_parent));
807 }
808 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked")) {
809 CeedSize l_size;
810
811 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
812 for (CeedInt i = 0; i < num_elem * elem_size; i++) {
813 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
814 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
815 }
816 }
817
818 // Copy data
819 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
820 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
821 CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, num_offsets, &impl->offsets_owned, &impl->offsets_borrowed, &impl->offsets));
822
823 // Orientation data
824 if (rstr_type == CEED_RESTRICTION_ORIENTED) {
825 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
826 CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, num_offsets, &impl->orients_owned, &impl->orients_borrowed, &impl->orients));
827 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
828 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
829 CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * num_offsets, &impl->curl_orients_owned, &impl->curl_orients_borrowed,
830 &impl->curl_orients));
831 }
832 }
833
834 // Set apply function based upon num_comp, block_size, and comp_stride
835 CeedInt index = -1;
836
837 if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1);
838 switch (index) {
839 case 110:
840 impl->Apply = CeedElemRestrictionApply_Ref_110;
841 break;
842 case 111:
843 impl->Apply = CeedElemRestrictionApply_Ref_111;
844 break;
845 case 180:
846 impl->Apply = CeedElemRestrictionApply_Ref_180;
847 break;
848 case 181:
849 impl->Apply = CeedElemRestrictionApply_Ref_181;
850 break;
851 case 310:
852 impl->Apply = CeedElemRestrictionApply_Ref_310;
853 break;
854 case 311:
855 impl->Apply = CeedElemRestrictionApply_Ref_311;
856 break;
857 case 380:
858 impl->Apply = CeedElemRestrictionApply_Ref_380;
859 break;
860 case 381:
861 impl->Apply = CeedElemRestrictionApply_Ref_381;
862 break;
863 // LCOV_EXCL_START
864 case 410:
865 impl->Apply = CeedElemRestrictionApply_Ref_410;
866 break;
867 case 411:
868 impl->Apply = CeedElemRestrictionApply_Ref_411;
869 break;
870 case 480:
871 impl->Apply = CeedElemRestrictionApply_Ref_480;
872 break;
873 case 481:
874 impl->Apply = CeedElemRestrictionApply_Ref_481;
875 break;
876 case 510:
877 impl->Apply = CeedElemRestrictionApply_Ref_510;
878 break;
879 // LCOV_EXCL_STOP
880 case 511:
881 impl->Apply = CeedElemRestrictionApply_Ref_511;
882 break;
883 // LCOV_EXCL_START
884 case 580:
885 impl->Apply = CeedElemRestrictionApply_Ref_580;
886 break;
887 // LCOV_EXCL_STOP
888 case 581:
889 impl->Apply = CeedElemRestrictionApply_Ref_581;
890 break;
891 default:
892 impl->Apply = CeedElemRestrictionApply_Ref_Core;
893 break;
894 }
895
896 // Register backend functions
897 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref));
898 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
899 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref));
900 if (rstr_type == CEED_RESTRICTION_POINTS) {
901 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref));
902 }
903 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
904 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
905 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
906 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
907 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref));
908 CeedCallBackend(CeedDestroy(&ceed));
909 return CEED_ERROR_SUCCESS;
910 }
911
912 //------------------------------------------------------------------------------
913