1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include "ceed-memcheck.h"
15
16 //------------------------------------------------------------------------------
17 // Set backend strides
18 //------------------------------------------------------------------------------
CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr,CeedInt strides[3])19 static inline int CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr, CeedInt strides[3]) {
20 CeedInt elem_size, num_comp, num_elem;
21
22 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
23 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
24 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
25 // Memcheck default, contiguous by component, then node
26 strides[0] = num_comp;
27 strides[1] = 1;
28 strides[2] = num_comp * elem_size;
29 /**
30 // CPU default, contiguous by node, then component
31 strides[0] = 1;
32 strides[1] = elem_size;
33 strides[2] = elem_size * num_comp;
34
35 // GPU default, contiguous by node, then element
36 strides[0] = 1;
37 strides[1] = num_elem * elem_size;
38 strides[2] = elem_size;
39 **/
40 return CEED_ERROR_SUCCESS;
41 }
42
43 //------------------------------------------------------------------------------
44 // Core ElemRestriction Apply Code
45 //------------------------------------------------------------------------------
CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)46 static inline int CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
47 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
48 CeedSize v_offset, const CeedScalar *__restrict__ uu,
49 CeedScalar *__restrict__ vv) {
50 // Get strides
51 bool has_backend_strides;
52 CeedInt strides[3] = {0};
53
54 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
55 if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides));
56 else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
57
58 // Apply restriction
59 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
60 for (CeedSize k = 0; k < num_comp; k++) {
61 for (CeedSize n = 0; n < elem_size; n++) {
62 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
63 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
64 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]];
65 }
66 }
67 }
68 }
69 return CEED_ERROR_SUCCESS;
70 }
71
CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)72 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
73 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
74 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
75 CeedScalar *__restrict__ vv) {
76 // Default restriction with offsets
77 CeedElemRestriction_Memcheck *impl;
78
79 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
80 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
81 for (CeedSize k = 0; k < num_comp; k++) {
82 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
83 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
84 }
85 }
86 }
87 return CEED_ERROR_SUCCESS;
88 }
89
CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)90 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
91 const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
92 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
93 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
94 // Restriction with orientations
95 CeedElemRestriction_Memcheck *impl;
96
97 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
98 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
99 for (CeedSize k = 0; k < num_comp; k++) {
100 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
101 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] =
102 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
103 }
104 }
105 }
106 return CEED_ERROR_SUCCESS;
107 }
108
CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)109 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
110 const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
111 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
112 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
113 // Restriction with tridiagonal transformation
114 CeedElemRestriction_Memcheck *impl;
115
116 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
117 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
118 for (CeedSize k = 0; k < num_comp; k++) {
119 CeedSize n = 0;
120
121 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
122 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
123 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
124 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
125 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
126 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
127 }
128 for (n = 1; n < elem_size - 1; n++) {
129 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
130 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
131 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
132 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
133 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
134 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
135 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
136 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
137 }
138 }
139 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
140 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
141 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
142 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
143 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
144 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
145 }
146 }
147 }
148 return CEED_ERROR_SUCCESS;
149 }
150
CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)151 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(
152 CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop,
153 CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
154 // Restriction with (unsigned) tridiagonal transformation
155 CeedElemRestriction_Memcheck *impl;
156
157 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
158 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
159 for (CeedSize k = 0; k < num_comp; k++) {
160 CeedSize n = 0;
161
162 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
163 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
164 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
165 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
166 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
167 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
168 }
169 for (n = 1; n < elem_size - 1; n++) {
170 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
171 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
172 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
173 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
174 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
175 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
176 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
177 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
178 }
179 }
180 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
181 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
182 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
183 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
184 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
185 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
186 }
187 }
188 }
189 return CEED_ERROR_SUCCESS;
190 }
191
CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)192 static inline int CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
193 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
194 CeedSize v_offset, const CeedScalar *__restrict__ uu,
195 CeedScalar *__restrict__ vv) {
196 // Get strides
197 bool has_backend_strides;
198 CeedInt strides[3] = {0};
199
200 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
201 if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides));
202 else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
203
204 // Apply restriction
205 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
206 for (CeedSize k = 0; k < num_comp; k++) {
207 for (CeedSize n = 0; n < elem_size; n++) {
208 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
209 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
210 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
211 }
212 }
213 }
214 }
215 return CEED_ERROR_SUCCESS;
216 }
217
CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)218 static inline int CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
219 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
220 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
221 CeedScalar *__restrict__ vv) {
222 // Default restriction with offsets
223 CeedElemRestriction_Memcheck *impl;
224
225 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
226 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
227 for (CeedSize k = 0; k < num_comp; k++) {
228 for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
229 // Iteration bound set to discard padding elements
230 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
231 CeedScalar vv_loc;
232
233 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset];
234 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
235 }
236 }
237 }
238 }
239 return CEED_ERROR_SUCCESS;
240 }
241
CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)242 static inline int CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
243 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
244 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
245 CeedScalar *__restrict__ vv) {
246 // Restriction with orientations
247 CeedElemRestriction_Memcheck *impl;
248
249 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
250 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
251 for (CeedSize k = 0; k < num_comp; k++) {
252 for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
253 // Iteration bound set to discard padding elements
254 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
255 CeedScalar vv_loc;
256
257 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
258 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
259 }
260 }
261 }
262 }
263 return CEED_ERROR_SUCCESS;
264 }
265
CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)266 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
267 const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
268 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
269 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
270 // Restriction with tridiagonal transformation
271 CeedElemRestriction_Memcheck *impl;
272 CeedScalar vv_loc[block_size];
273
274 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
275 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
276 for (CeedSize k = 0; k < num_comp; k++) {
277 // Iteration bound set to discard padding elements
278 const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
279 CeedSize n = 0;
280
281 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
282 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
283 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
284 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
285 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
286 }
287 for (CeedSize j = 0; j < block_end; j++) {
288 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
289 }
290 for (n = 1; n < elem_size - 1; n++) {
291 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
292 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
293 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
294 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
295 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
296 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
297 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
298 }
299 for (CeedSize j = 0; j < block_end; j++) {
300 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
301 }
302 }
303 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
304 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
305 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
306 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
307 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
308 }
309 for (CeedSize j = 0; j < block_end; j++) {
310 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
311 }
312 }
313 }
314 return CEED_ERROR_SUCCESS;
315 }
316
CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)317 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(
318 CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop,
319 CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
320 // Restriction with (unsigned) tridiagonal transformation
321 CeedElemRestriction_Memcheck *impl;
322 CeedScalar vv_loc[block_size];
323
324 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
325 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
326 for (CeedSize k = 0; k < num_comp; k++) {
327 // Iteration bound set to discard padding elements
328 const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
329 CeedSize n = 0;
330
331 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
332 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
333 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
334 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
335 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
336 }
337 for (CeedSize j = 0; j < block_end; j++) {
338 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
339 }
340 for (n = 1; n < elem_size - 1; n++) {
341 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
342 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
343 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
344 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
345 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
346 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
347 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
348 }
349 for (CeedSize j = 0; j < block_end; j++) {
350 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
351 }
352 }
353 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
354 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
355 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
356 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
357 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
358 }
359 for (CeedSize j = 0; j < block_end; j++) {
360 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
361 }
362 }
363 }
364 return CEED_ERROR_SUCCESS;
365 }
366
CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)367 static inline int CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start,
368 CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
369 CeedScalar *__restrict__ vv) {
370 CeedInt num_points, l_vec_offset;
371 CeedSize e_vec_offset = 0;
372 CeedElemRestriction_Memcheck *impl;
373
374 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
375 for (CeedSize e = start; e < stop; e++) {
376 l_vec_offset = impl->offsets[e];
377 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
378 if (t_mode == CEED_NOTRANSPOSE) {
379 for (CeedSize i = 0; i < num_points; i++) {
380 for (CeedSize j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
381 }
382 } else {
383 for (CeedSize i = 0; i < num_points; i++) {
384 for (CeedSize j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] += uu[j * num_points + i + e_vec_offset];
385 }
386 }
387 e_vec_offset += num_points * (CeedSize)num_comp;
388 }
389 return CEED_ERROR_SUCCESS;
390 }
391
CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)392 static inline int CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
393 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode,
394 bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
395 CeedInt num_elem, elem_size;
396 CeedSize v_offset;
397 CeedRestrictionType rstr_type;
398 const CeedScalar *uu;
399 CeedScalar *vv;
400
401 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
402 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
403 v_offset = start * block_size * elem_size * (CeedSize)num_comp;
404 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
405 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
406
407 if (t_mode == CEED_TRANSPOSE) {
408 // Sum into for transpose mode, E-vector to L-vector
409 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
410 } else {
411 // Overwrite for notranspose mode, L-vector to E-vector
412 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
413 }
414
415 if (t_mode == CEED_TRANSPOSE) {
416 // Restriction from E-vector to L-vector
417 // Performing v += r^T * u
418 // uu has shape [elem_size, num_comp, num_elem], row-major
419 // vv has shape [nnodes, num_comp]
420 // Sum into for transpose mode
421 switch (rstr_type) {
422 case CEED_RESTRICTION_STRIDED:
423 CeedCallBackend(CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset,
424 uu, vv));
425 break;
426 case CEED_RESTRICTION_STANDARD:
427 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
428 elem_size, v_offset, uu, vv));
429 break;
430 case CEED_RESTRICTION_ORIENTED:
431 if (use_signs) {
432 CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
433 elem_size, v_offset, uu, vv));
434 } else {
435 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
436 elem_size, v_offset, uu, vv));
437 }
438 break;
439 case CEED_RESTRICTION_CURL_ORIENTED:
440 if (use_signs && use_orients) {
441 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
442 elem_size, v_offset, uu, vv));
443 } else if (use_orients) {
444 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
445 num_elem, elem_size, v_offset, uu, vv));
446 } else {
447 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
448 elem_size, v_offset, uu, vv));
449 }
450 break;
451 case CEED_RESTRICTION_POINTS:
452 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
453 break;
454 }
455 } else {
456 // Restriction from L-vector to E-vector
457 // Perform: v = r * u
458 // vv has shape [elem_size, num_comp, num_elem], row-major
459 // uu has shape [nnodes, num_comp]
460 // Overwrite for notranspose mode
461 switch (rstr_type) {
462 case CEED_RESTRICTION_STRIDED:
463 CeedCallBackend(CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size,
464 v_offset, uu, vv));
465 break;
466 case CEED_RESTRICTION_STANDARD:
467 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
468 elem_size, v_offset, uu, vv));
469 break;
470 case CEED_RESTRICTION_ORIENTED:
471 if (use_signs) {
472 CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
473 elem_size, v_offset, uu, vv));
474 } else {
475 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
476 elem_size, v_offset, uu, vv));
477 }
478 break;
479 case CEED_RESTRICTION_CURL_ORIENTED:
480 if (use_signs && use_orients) {
481 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
482 num_elem, elem_size, v_offset, uu, vv));
483 } else if (use_orients) {
484 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
485 num_elem, elem_size, v_offset, uu, vv));
486 } else {
487 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
488 elem_size, v_offset, uu, vv));
489 }
490 break;
491 case CEED_RESTRICTION_POINTS:
492 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
493 break;
494 }
495 }
496 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
497 CeedCallBackend(CeedVectorRestoreArray(v, &vv));
498 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
499 return CEED_ERROR_SUCCESS;
500 }
501
502 //------------------------------------------------------------------------------
503 // ElemRestriction Apply
504 //------------------------------------------------------------------------------
CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)505 static int CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
506 CeedInt num_block, block_size, num_comp, comp_stride;
507 CeedElemRestriction_Memcheck *impl;
508
509 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
510 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
511 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
512 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
513 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
514 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
515 return CEED_ERROR_SUCCESS;
516 }
517
518 //------------------------------------------------------------------------------
519 // ElemRestriction Apply Unsigned
520 //------------------------------------------------------------------------------
CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)521 static int CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
522 CeedRequest *request) {
523 CeedInt num_block, block_size, num_comp, comp_stride;
524 CeedElemRestriction_Memcheck *impl;
525
526 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
527 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
528 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
529 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
530 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
531 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
532 return CEED_ERROR_SUCCESS;
533 }
534
535 //------------------------------------------------------------------------------
536 // ElemRestriction Apply Unoriented
537 //------------------------------------------------------------------------------
CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)538 static int CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
539 CeedRequest *request) {
540 CeedInt num_block, block_size, num_comp, comp_stride;
541 CeedElemRestriction_Memcheck *impl;
542
543 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
544 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
545 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
546 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
547 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
548 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
549 return CEED_ERROR_SUCCESS;
550 }
551
552 //------------------------------------------------------------------------------
553 // ElemRestriction Apply Points
554 //------------------------------------------------------------------------------
CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr,CeedInt elem,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)555 static int CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u,
556 CeedVector v, CeedRequest *request) {
557 CeedInt num_comp;
558 CeedElemRestriction_Memcheck *impl;
559
560 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
561 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
562 return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
563 }
564
565 //------------------------------------------------------------------------------
566 // ElemRestriction Apply Block
567 //------------------------------------------------------------------------------
CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr,CeedInt block,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)568 static int CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
569 CeedRequest *request) {
570 CeedInt block_size, num_comp, comp_stride;
571 CeedElemRestriction_Memcheck *impl;
572
573 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
574 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
575 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
576 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
577 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
578 return CEED_ERROR_SUCCESS;
579 }
580
581 //------------------------------------------------------------------------------
582 // ElemRestriction Get Offsets
583 //------------------------------------------------------------------------------
CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr,CeedMemType mem_type,const CeedInt ** offsets)584 static int CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
585 CeedElemRestriction_Memcheck *impl;
586
587 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
588
589 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
590
591 *offsets = impl->offsets;
592 return CEED_ERROR_SUCCESS;
593 }
594
595 //------------------------------------------------------------------------------
596 // ElemRestriction Get Orientations
597 //------------------------------------------------------------------------------
CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr,CeedMemType mem_type,const bool ** orients)598 static int CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
599 CeedElemRestriction_Memcheck *impl;
600
601 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
602
603 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
604
605 *orients = impl->orients;
606 return CEED_ERROR_SUCCESS;
607 }
608
609 //------------------------------------------------------------------------------
610 // ElemRestriction Get Curl-Conforming Orientations
611 //------------------------------------------------------------------------------
CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr,CeedMemType mem_type,const CeedInt8 ** curl_orients)612 static int CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
613 CeedElemRestriction_Memcheck *impl;
614
615 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
616
617 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
618
619 *curl_orients = impl->curl_orients;
620 return CEED_ERROR_SUCCESS;
621 }
622
623 //------------------------------------------------------------------------------
624 // ElemRestriction Destroy
625 //------------------------------------------------------------------------------
CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr)626 static int CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr) {
627 CeedElemRestriction_Memcheck *impl;
628
629 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
630 CeedCallBackend(CeedFree(&impl->offsets_allocated));
631 CeedCallBackend(CeedFree(&impl->orients_allocated));
632 CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
633 CeedCallBackend(CeedFree(&impl));
634 return CEED_ERROR_SUCCESS;
635 }
636
637 //------------------------------------------------------------------------------
638 // ElemRestriction Create
639 //------------------------------------------------------------------------------
CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type,CeedCopyMode copy_mode,const CeedInt * offsets,const bool * orients,const CeedInt8 * curl_orients,CeedElemRestriction rstr)640 int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
641 const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
642 Ceed ceed;
643 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
644 CeedRestrictionType rstr_type;
645 CeedElemRestriction_Memcheck *impl;
646
647 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
648 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
649 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
650 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
651 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
652 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
653 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
654 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
655
656 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
657
658 CeedCallBackend(CeedCalloc(1, &impl));
659 CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
660
661 // Set layouts
662 {
663 bool has_backend_strides;
664 CeedInt e_layout[3] = {1, elem_size, elem_size * num_comp}, l_layout[3] = {0};
665
666 CeedCallBackend(CeedElemRestrictionSetELayout(rstr, e_layout));
667 if (rstr_type == CEED_RESTRICTION_STRIDED) {
668 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
669 if (has_backend_strides) {
670 CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, l_layout));
671 CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, l_layout));
672 }
673 }
674 }
675
676 // Expand E-vector size for AtPoints
677 if (rstr_type == CEED_RESTRICTION_POINTS) {
678 CeedSize max_points = 0, num_points_total = 0;
679
680 for (CeedInt i = 0; i < num_elem; i++) {
681 CeedInt num_points = offsets[i + 1] - offsets[i];
682
683 max_points = CeedIntMax(max_points, num_points);
684 num_points_total += num_points;
685 }
686 // -- Increase size for last element
687 num_points_total += (max_points - (offsets[num_elem] - offsets[num_elem - 1]));
688 CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, num_points_total * num_comp));
689 }
690
691 // Offsets data
692 if (rstr_type != CEED_RESTRICTION_STRIDED) {
693 // Check indices
694 {
695 CeedSize l_size;
696
697 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
698 for (CeedInt i = 0; i < num_elem * elem_size; i++) {
699 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
700 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
701 }
702 }
703
704 // Copy data
705 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
706 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
707 switch (copy_mode) {
708 case CEED_COPY_VALUES:
709 CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated));
710 memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0]));
711 impl->offsets = impl->offsets_allocated;
712 break;
713 case CEED_OWN_POINTER:
714 impl->offsets_allocated = (CeedInt *)offsets;
715 impl->offsets = impl->offsets_allocated;
716 break;
717 case CEED_USE_POINTER:
718 impl->offsets = offsets;
719 }
720
721 // Orientation data
722 if (rstr_type == CEED_RESTRICTION_ORIENTED) {
723 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
724 switch (copy_mode) {
725 case CEED_COPY_VALUES:
726 CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated));
727 memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0]));
728 impl->orients = impl->orients_allocated;
729 break;
730 case CEED_OWN_POINTER:
731 impl->orients_allocated = (bool *)orients;
732 impl->orients = impl->orients_allocated;
733 break;
734 case CEED_USE_POINTER:
735 impl->orients = orients;
736 }
737 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
738 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
739 switch (copy_mode) {
740 case CEED_COPY_VALUES:
741 CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated));
742 memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0]));
743 impl->curl_orients = impl->curl_orients_allocated;
744 break;
745 case CEED_OWN_POINTER:
746 impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
747 impl->curl_orients = impl->curl_orients_allocated;
748 break;
749 case CEED_USE_POINTER:
750 impl->curl_orients = curl_orients;
751 }
752 }
753 }
754
755 // Set apply function
756 impl->Apply = CeedElemRestrictionApply_Memcheck_Core;
757
758 // Register backend functions
759 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Memcheck));
760 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Memcheck));
761 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Memcheck));
762 if (rstr_type == CEED_RESTRICTION_POINTS) {
763 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement",
764 CeedElemRestrictionApplyAtPointsInElement_Memcheck));
765 }
766 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Memcheck));
767 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Memcheck));
768 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Memcheck));
769 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Memcheck));
770 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Memcheck));
771 CeedCallBackend(CeedDestroy(&ceed));
772 return CEED_ERROR_SUCCESS;
773 }
774
775 //------------------------------------------------------------------------------
776