xref: /libCEED/rust/libceed-sys/c-src/backends/memcheck/ceed-memcheck-restriction.c (revision 9e82028b7a110649b446e985dc29425c820314d7)
1*9e82028bSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2*9e82028bSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3*9e82028bSJeremy L Thompson //
4*9e82028bSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
5*9e82028bSJeremy L Thompson //
6*9e82028bSJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
7*9e82028bSJeremy L Thompson 
8*9e82028bSJeremy L Thompson #include <ceed.h>
9*9e82028bSJeremy L Thompson #include <ceed/backend.h>
10*9e82028bSJeremy L Thompson #include <stdbool.h>
11*9e82028bSJeremy L Thompson #include <stdlib.h>
12*9e82028bSJeremy L Thompson #include <string.h>
13*9e82028bSJeremy L Thompson 
14*9e82028bSJeremy L Thompson #include "ceed-memcheck.h"
15*9e82028bSJeremy L Thompson 
16*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
17*9e82028bSJeremy L Thompson // Set backend strides
18*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
19*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr, CeedInt strides[3]) {
20*9e82028bSJeremy L Thompson   CeedInt elem_size, num_comp, num_elem;
21*9e82028bSJeremy L Thompson 
22*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
23*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
24*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
25*9e82028bSJeremy L Thompson   // Memcheck default, contiguous by component, then node
26*9e82028bSJeremy L Thompson   strides[0] = num_comp;
27*9e82028bSJeremy L Thompson   strides[1] = 1;
28*9e82028bSJeremy L Thompson   strides[2] = num_comp * elem_size;
29*9e82028bSJeremy L Thompson   /**
30*9e82028bSJeremy L Thompson       // CPU default, contiguous by node, then component
31*9e82028bSJeremy L Thompson       strides[0] = 1;
32*9e82028bSJeremy L Thompson       strides[1] = elem_size;
33*9e82028bSJeremy L Thompson       strides[2] = elem_size * num_comp;
34*9e82028bSJeremy L Thompson 
35*9e82028bSJeremy L Thompson       // GPU default, contiguous by node, then element
36*9e82028bSJeremy L Thompson       strides[0] = 1;
37*9e82028bSJeremy L Thompson       strides[1] = num_elem * elem_size;
38*9e82028bSJeremy L Thompson       strides[2] = elem_size;
39*9e82028bSJeremy L Thompson      **/
40*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
41*9e82028bSJeremy L Thompson }
42*9e82028bSJeremy L Thompson 
43*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
44*9e82028bSJeremy L Thompson // Core ElemRestriction Apply Code
45*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
46*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
47*9e82028bSJeremy L Thompson                                                                            CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
48*9e82028bSJeremy L Thompson                                                                            CeedInt v_offset, const CeedScalar *__restrict__ uu,
49*9e82028bSJeremy L Thompson                                                                            CeedScalar *__restrict__ vv) {
50*9e82028bSJeremy L Thompson   // Get strides
51*9e82028bSJeremy L Thompson   bool    has_backend_strides;
52*9e82028bSJeremy L Thompson   CeedInt strides[3] = {0};
53*9e82028bSJeremy L Thompson 
54*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
55*9e82028bSJeremy L Thompson   if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides));
56*9e82028bSJeremy L Thompson   else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides));
57*9e82028bSJeremy L Thompson 
58*9e82028bSJeremy L Thompson   // Apply restriction
59*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
60*9e82028bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
61*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
62*9e82028bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
63*9e82028bSJeremy L Thompson           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
64*9e82028bSJeremy L Thompson               uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
65*9e82028bSJeremy L Thompson         }
66*9e82028bSJeremy L Thompson       }
67*9e82028bSJeremy L Thompson     }
68*9e82028bSJeremy L Thompson   }
69*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
70*9e82028bSJeremy L Thompson }
71*9e82028bSJeremy L Thompson 
72*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
73*9e82028bSJeremy L Thompson                                                                           const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
74*9e82028bSJeremy L Thompson                                                                           CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu,
75*9e82028bSJeremy L Thompson                                                                           CeedScalar *__restrict__ vv) {
76*9e82028bSJeremy L Thompson   // Default restriction with offsets
77*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
78*9e82028bSJeremy L Thompson 
79*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
80*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
81*9e82028bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
82*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) {
83*9e82028bSJeremy L Thompson         vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
84*9e82028bSJeremy L Thompson       }
85*9e82028bSJeremy L Thompson     }
86*9e82028bSJeremy L Thompson   }
87*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
88*9e82028bSJeremy L Thompson }
89*9e82028bSJeremy L Thompson 
90*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
91*9e82028bSJeremy L Thompson                                                                             const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
92*9e82028bSJeremy L Thompson                                                                             CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset,
93*9e82028bSJeremy L Thompson                                                                             const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
94*9e82028bSJeremy L Thompson   // Restriction with orientations
95*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
96*9e82028bSJeremy L Thompson 
97*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
98*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
99*9e82028bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
100*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) {
101*9e82028bSJeremy L Thompson         vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] =
102*9e82028bSJeremy L Thompson             uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
103*9e82028bSJeremy L Thompson       }
104*9e82028bSJeremy L Thompson     }
105*9e82028bSJeremy L Thompson   }
106*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
107*9e82028bSJeremy L Thompson }
108*9e82028bSJeremy L Thompson 
109*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
110*9e82028bSJeremy L Thompson                                                                                 const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
111*9e82028bSJeremy L Thompson                                                                                 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset,
112*9e82028bSJeremy L Thompson                                                                                 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
113*9e82028bSJeremy L Thompson   // Restriction with tridiagonal transformation
114*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
115*9e82028bSJeremy L Thompson 
116*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
117*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
118*9e82028bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
119*9e82028bSJeremy L Thompson       CeedInt n = 0;
120*9e82028bSJeremy L Thompson 
121*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
122*9e82028bSJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
123*9e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
124*9e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
125*9e82028bSJeremy L Thompson             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
126*9e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
127*9e82028bSJeremy L Thompson       }
128*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
129*9e82028bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
130*9e82028bSJeremy L Thompson           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
131*9e82028bSJeremy L Thompson               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
132*9e82028bSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
133*9e82028bSJeremy L Thompson               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
134*9e82028bSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
135*9e82028bSJeremy L Thompson               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
136*9e82028bSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
137*9e82028bSJeremy L Thompson         }
138*9e82028bSJeremy L Thompson       }
139*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
140*9e82028bSJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
141*9e82028bSJeremy L Thompson             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
142*9e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
143*9e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
144*9e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
145*9e82028bSJeremy L Thompson       }
146*9e82028bSJeremy L Thompson     }
147*9e82028bSJeremy L Thompson   }
148*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
149*9e82028bSJeremy L Thompson }
150*9e82028bSJeremy L Thompson 
151*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(
152*9e82028bSJeremy L Thompson     CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop,
153*9e82028bSJeremy L Thompson     CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
154*9e82028bSJeremy L Thompson   // Restriction with (unsigned) tridiagonal transformation
155*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
156*9e82028bSJeremy L Thompson 
157*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
158*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
159*9e82028bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
160*9e82028bSJeremy L Thompson       CeedInt n = 0;
161*9e82028bSJeremy L Thompson 
162*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
163*9e82028bSJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
164*9e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
165*9e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
166*9e82028bSJeremy L Thompson             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
167*9e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
168*9e82028bSJeremy L Thompson       }
169*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
170*9e82028bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
171*9e82028bSJeremy L Thompson           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
172*9e82028bSJeremy L Thompson               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
173*9e82028bSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
174*9e82028bSJeremy L Thompson               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
175*9e82028bSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
176*9e82028bSJeremy L Thompson               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
177*9e82028bSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
178*9e82028bSJeremy L Thompson         }
179*9e82028bSJeremy L Thompson       }
180*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
181*9e82028bSJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
182*9e82028bSJeremy L Thompson             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
183*9e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
184*9e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
185*9e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
186*9e82028bSJeremy L Thompson       }
187*9e82028bSJeremy L Thompson     }
188*9e82028bSJeremy L Thompson   }
189*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
190*9e82028bSJeremy L Thompson }
191*9e82028bSJeremy L Thompson 
192*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
193*9e82028bSJeremy L Thompson                                                                          CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
194*9e82028bSJeremy L Thompson                                                                          CeedInt v_offset, const CeedScalar *__restrict__ uu,
195*9e82028bSJeremy L Thompson                                                                          CeedScalar *__restrict__ vv) {
196*9e82028bSJeremy L Thompson   // Get strides
197*9e82028bSJeremy L Thompson   bool    has_backend_strides;
198*9e82028bSJeremy L Thompson   CeedInt strides[3] = {0};
199*9e82028bSJeremy L Thompson 
200*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
201*9e82028bSJeremy L Thompson   if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides));
202*9e82028bSJeremy L Thompson   else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides));
203*9e82028bSJeremy L Thompson 
204*9e82028bSJeremy L Thompson   // Apply restriction
205*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
206*9e82028bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
207*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
208*9e82028bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
209*9e82028bSJeremy L Thompson           vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
210*9e82028bSJeremy L Thompson               uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
211*9e82028bSJeremy L Thompson         }
212*9e82028bSJeremy L Thompson       }
213*9e82028bSJeremy L Thompson     }
214*9e82028bSJeremy L Thompson   }
215*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
216*9e82028bSJeremy L Thompson }
217*9e82028bSJeremy L Thompson 
218*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
219*9e82028bSJeremy L Thompson                                                                         const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
220*9e82028bSJeremy L Thompson                                                                         CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu,
221*9e82028bSJeremy L Thompson                                                                         CeedScalar *__restrict__ vv) {
222*9e82028bSJeremy L Thompson   // Default restriction with offsets
223*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
224*9e82028bSJeremy L Thompson 
225*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
226*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
227*9e82028bSJeremy L Thompson     for (CeedInt k = 0; k < num_comp; k++) {
228*9e82028bSJeremy L Thompson       for (CeedInt i = 0; i < elem_size * block_size; i += block_size) {
229*9e82028bSJeremy L Thompson         // Iteration bound set to discard padding elements
230*9e82028bSJeremy L Thompson         for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
231*9e82028bSJeremy L Thompson           CeedScalar vv_loc;
232*9e82028bSJeremy L Thompson 
233*9e82028bSJeremy L Thompson           vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset];
234*9e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
235*9e82028bSJeremy L Thompson         }
236*9e82028bSJeremy L Thompson       }
237*9e82028bSJeremy L Thompson     }
238*9e82028bSJeremy L Thompson   }
239*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
240*9e82028bSJeremy L Thompson }
241*9e82028bSJeremy L Thompson 
242*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
243*9e82028bSJeremy L Thompson                                                                           const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
244*9e82028bSJeremy L Thompson                                                                           CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu,
245*9e82028bSJeremy L Thompson                                                                           CeedScalar *__restrict__ vv) {
246*9e82028bSJeremy L Thompson   // Restriction with orientations
247*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
248*9e82028bSJeremy L Thompson 
249*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
250*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
251*9e82028bSJeremy L Thompson     for (CeedInt k = 0; k < num_comp; k++) {
252*9e82028bSJeremy L Thompson       for (CeedInt i = 0; i < elem_size * block_size; i += block_size) {
253*9e82028bSJeremy L Thompson         // Iteration bound set to discard padding elements
254*9e82028bSJeremy L Thompson         for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
255*9e82028bSJeremy L Thompson           CeedScalar vv_loc;
256*9e82028bSJeremy L Thompson 
257*9e82028bSJeremy L Thompson           vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
258*9e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
259*9e82028bSJeremy L Thompson         }
260*9e82028bSJeremy L Thompson       }
261*9e82028bSJeremy L Thompson     }
262*9e82028bSJeremy L Thompson   }
263*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
264*9e82028bSJeremy L Thompson }
265*9e82028bSJeremy L Thompson 
266*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
267*9e82028bSJeremy L Thompson                                                                               const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
268*9e82028bSJeremy L Thompson                                                                               CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset,
269*9e82028bSJeremy L Thompson                                                                               const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
270*9e82028bSJeremy L Thompson   // Restriction with tridiagonal transformation
271*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
272*9e82028bSJeremy L Thompson   CeedScalar                    vv_loc[block_size];
273*9e82028bSJeremy L Thompson 
274*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
275*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
276*9e82028bSJeremy L Thompson     for (CeedInt k = 0; k < num_comp; k++) {
277*9e82028bSJeremy L Thompson       // Iteration bound set to discard padding elements
278*9e82028bSJeremy L Thompson       const CeedInt block_end = CeedIntMin(block_size, num_elem - e);
279*9e82028bSJeremy L Thompson       CeedInt       n         = 0;
280*9e82028bSJeremy L Thompson 
281*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
282*9e82028bSJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
283*9e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
284*9e82028bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
285*9e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
286*9e82028bSJeremy L Thompson       }
287*9e82028bSJeremy L Thompson       for (CeedInt j = 0; j < block_end; j++) {
288*9e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
289*9e82028bSJeremy L Thompson       }
290*9e82028bSJeremy L Thompson       for (n = 1; n < elem_size - 1; n++) {
291*9e82028bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
292*9e82028bSJeremy L Thompson           vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
293*9e82028bSJeremy L Thompson                           impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
294*9e82028bSJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
295*9e82028bSJeremy L Thompson                           impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
296*9e82028bSJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
297*9e82028bSJeremy L Thompson                           impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
298*9e82028bSJeremy L Thompson         }
299*9e82028bSJeremy L Thompson         for (CeedInt j = 0; j < block_end; j++) {
300*9e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
301*9e82028bSJeremy L Thompson         }
302*9e82028bSJeremy L Thompson       }
303*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
304*9e82028bSJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
305*9e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
306*9e82028bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
307*9e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
308*9e82028bSJeremy L Thompson       }
309*9e82028bSJeremy L Thompson       for (CeedInt j = 0; j < block_end; j++) {
310*9e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
311*9e82028bSJeremy L Thompson       }
312*9e82028bSJeremy L Thompson     }
313*9e82028bSJeremy L Thompson   }
314*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
315*9e82028bSJeremy L Thompson }
316*9e82028bSJeremy L Thompson 
317*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(
318*9e82028bSJeremy L Thompson     CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop,
319*9e82028bSJeremy L Thompson     CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
320*9e82028bSJeremy L Thompson   // Restriction with (unsigned) tridiagonal transformation
321*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
322*9e82028bSJeremy L Thompson   CeedScalar                    vv_loc[block_size];
323*9e82028bSJeremy L Thompson 
324*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
325*9e82028bSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
326*9e82028bSJeremy L Thompson     for (CeedInt k = 0; k < num_comp; k++) {
327*9e82028bSJeremy L Thompson       // Iteration bound set to discard padding elements
328*9e82028bSJeremy L Thompson       const CeedInt block_end = CeedIntMin(block_size, num_elem - e);
329*9e82028bSJeremy L Thompson       CeedInt       n         = 0;
330*9e82028bSJeremy L Thompson 
331*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
332*9e82028bSJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
333*9e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
334*9e82028bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
335*9e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
336*9e82028bSJeremy L Thompson       }
337*9e82028bSJeremy L Thompson       for (CeedInt j = 0; j < block_end; j++) {
338*9e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
339*9e82028bSJeremy L Thompson       }
340*9e82028bSJeremy L Thompson       for (n = 1; n < elem_size - 1; n++) {
341*9e82028bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
342*9e82028bSJeremy L Thompson           vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
343*9e82028bSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
344*9e82028bSJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
345*9e82028bSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
346*9e82028bSJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
347*9e82028bSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
348*9e82028bSJeremy L Thompson         }
349*9e82028bSJeremy L Thompson         for (CeedInt j = 0; j < block_end; j++) {
350*9e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
351*9e82028bSJeremy L Thompson         }
352*9e82028bSJeremy L Thompson       }
353*9e82028bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
354*9e82028bSJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
355*9e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
356*9e82028bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
357*9e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
358*9e82028bSJeremy L Thompson       }
359*9e82028bSJeremy L Thompson       for (CeedInt j = 0; j < block_end; j++) {
360*9e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
361*9e82028bSJeremy L Thompson       }
362*9e82028bSJeremy L Thompson     }
363*9e82028bSJeremy L Thompson   }
364*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
365*9e82028bSJeremy L Thompson }
366*9e82028bSJeremy L Thompson 
367*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start,
368*9e82028bSJeremy L Thompson                                                                           CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
369*9e82028bSJeremy L Thompson                                                                           CeedScalar *__restrict__ vv) {
370*9e82028bSJeremy L Thompson   CeedInt                       num_points, l_vec_offset, e_vec_offset = 0;
371*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
372*9e82028bSJeremy L Thompson 
373*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
374*9e82028bSJeremy L Thompson   for (CeedInt e = start; e < stop; e++) {
375*9e82028bSJeremy L Thompson     l_vec_offset = impl->offsets[e];
376*9e82028bSJeremy L Thompson     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
377*9e82028bSJeremy L Thompson     if (t_mode == CEED_NOTRANSPOSE) {
378*9e82028bSJeremy L Thompson       for (CeedInt i = 0; i < num_points; i++) {
379*9e82028bSJeremy L Thompson         for (CeedInt j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
380*9e82028bSJeremy L Thompson       }
381*9e82028bSJeremy L Thompson     } else {
382*9e82028bSJeremy L Thompson       for (CeedInt i = 0; i < num_points; i++) {
383*9e82028bSJeremy L Thompson         for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * num_points + i + e_vec_offset];
384*9e82028bSJeremy L Thompson       }
385*9e82028bSJeremy L Thompson     }
386*9e82028bSJeremy L Thompson     e_vec_offset += num_points * num_comp;
387*9e82028bSJeremy L Thompson   }
388*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
389*9e82028bSJeremy L Thompson }
390*9e82028bSJeremy L Thompson 
391*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
392*9e82028bSJeremy L Thompson                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode,
393*9e82028bSJeremy L Thompson                                                          bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
394*9e82028bSJeremy L Thompson   CeedInt             num_elem, elem_size, v_offset;
395*9e82028bSJeremy L Thompson   CeedRestrictionType rstr_type;
396*9e82028bSJeremy L Thompson   const CeedScalar   *uu;
397*9e82028bSJeremy L Thompson   CeedScalar         *vv;
398*9e82028bSJeremy L Thompson 
399*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
400*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
401*9e82028bSJeremy L Thompson   v_offset = start * block_size * elem_size * num_comp;
402*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
403*9e82028bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
404*9e82028bSJeremy L Thompson 
405*9e82028bSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
406*9e82028bSJeremy L Thompson     // Sum into for transpose mode, E-vector to L-vector
407*9e82028bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
408*9e82028bSJeremy L Thompson   } else {
409*9e82028bSJeremy L Thompson     // Overwrite for notranspose mode, L-vector to E-vector
410*9e82028bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
411*9e82028bSJeremy L Thompson   }
412*9e82028bSJeremy L Thompson 
413*9e82028bSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
414*9e82028bSJeremy L Thompson     // Restriction from E-vector to L-vector
415*9e82028bSJeremy L Thompson     // Performing v += r^T * u
416*9e82028bSJeremy L Thompson     // uu has shape [elem_size, num_comp, num_elem], row-major
417*9e82028bSJeremy L Thompson     // vv has shape [nnodes, num_comp]
418*9e82028bSJeremy L Thompson     // Sum into for transpose mode
419*9e82028bSJeremy L Thompson     switch (rstr_type) {
420*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_STRIDED:
421*9e82028bSJeremy L Thompson         CeedCallBackend(
422*9e82028bSJeremy L Thompson             CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
423*9e82028bSJeremy L Thompson         break;
424*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_STANDARD:
425*9e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
426*9e82028bSJeremy L Thompson                                                                               elem_size, v_offset, uu, vv));
427*9e82028bSJeremy L Thompson         break;
428*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_ORIENTED:
429*9e82028bSJeremy L Thompson         if (use_signs) {
430*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
431*9e82028bSJeremy L Thompson                                                                                   elem_size, v_offset, uu, vv));
432*9e82028bSJeremy L Thompson         } else {
433*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
434*9e82028bSJeremy L Thompson                                                                                 elem_size, v_offset, uu, vv));
435*9e82028bSJeremy L Thompson         }
436*9e82028bSJeremy L Thompson         break;
437*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_CURL_ORIENTED:
438*9e82028bSJeremy L Thompson         if (use_signs && use_orients) {
439*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
440*9e82028bSJeremy L Thompson                                                                                       elem_size, v_offset, uu, vv));
441*9e82028bSJeremy L Thompson         } else if (use_orients) {
442*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
443*9e82028bSJeremy L Thompson                                                                                               num_elem, elem_size, v_offset, uu, vv));
444*9e82028bSJeremy L Thompson         } else {
445*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
446*9e82028bSJeremy L Thompson                                                                                 elem_size, v_offset, uu, vv));
447*9e82028bSJeremy L Thompson         }
448*9e82028bSJeremy L Thompson         break;
449*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_POINTS:
450*9e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
451*9e82028bSJeremy L Thompson         break;
452*9e82028bSJeremy L Thompson     }
453*9e82028bSJeremy L Thompson   } else {
454*9e82028bSJeremy L Thompson     // Restriction from L-vector to E-vector
455*9e82028bSJeremy L Thompson     // Perform: v = r * u
456*9e82028bSJeremy L Thompson     // vv has shape [elem_size, num_comp, num_elem], row-major
457*9e82028bSJeremy L Thompson     // uu has shape [nnodes, num_comp]
458*9e82028bSJeremy L Thompson     // Overwrite for notranspose mode
459*9e82028bSJeremy L Thompson     switch (rstr_type) {
460*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_STRIDED:
461*9e82028bSJeremy L Thompson         CeedCallBackend(
462*9e82028bSJeremy L Thompson             CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
463*9e82028bSJeremy L Thompson         break;
464*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_STANDARD:
465*9e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
466*9e82028bSJeremy L Thompson                                                                                 elem_size, v_offset, uu, vv));
467*9e82028bSJeremy L Thompson         break;
468*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_ORIENTED:
469*9e82028bSJeremy L Thompson         if (use_signs) {
470*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
471*9e82028bSJeremy L Thompson                                                                                     elem_size, v_offset, uu, vv));
472*9e82028bSJeremy L Thompson         } else {
473*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
474*9e82028bSJeremy L Thompson                                                                                   elem_size, v_offset, uu, vv));
475*9e82028bSJeremy L Thompson         }
476*9e82028bSJeremy L Thompson         break;
477*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_CURL_ORIENTED:
478*9e82028bSJeremy L Thompson         if (use_signs && use_orients) {
479*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
480*9e82028bSJeremy L Thompson                                                                                         num_elem, elem_size, v_offset, uu, vv));
481*9e82028bSJeremy L Thompson         } else if (use_orients) {
482*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
483*9e82028bSJeremy L Thompson                                                                                                 num_elem, elem_size, v_offset, uu, vv));
484*9e82028bSJeremy L Thompson         } else {
485*9e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
486*9e82028bSJeremy L Thompson                                                                                   elem_size, v_offset, uu, vv));
487*9e82028bSJeremy L Thompson         }
488*9e82028bSJeremy L Thompson         break;
489*9e82028bSJeremy L Thompson       case CEED_RESTRICTION_POINTS:
490*9e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
491*9e82028bSJeremy L Thompson         break;
492*9e82028bSJeremy L Thompson     }
493*9e82028bSJeremy L Thompson   }
494*9e82028bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
495*9e82028bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
496*9e82028bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
497*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
498*9e82028bSJeremy L Thompson }
499*9e82028bSJeremy L Thompson 
500*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
501*9e82028bSJeremy L Thompson // ElemRestriction Apply
502*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
503*9e82028bSJeremy L Thompson static int CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
504*9e82028bSJeremy L Thompson   CeedInt                       num_block, block_size, num_comp, comp_stride;
505*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
506*9e82028bSJeremy L Thompson 
507*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
508*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
509*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
510*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
511*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
512*9e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
513*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
514*9e82028bSJeremy L Thompson }
515*9e82028bSJeremy L Thompson 
516*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
517*9e82028bSJeremy L Thompson // ElemRestriction Apply Unsigned
518*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
519*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
520*9e82028bSJeremy L Thompson                                                      CeedRequest *request) {
521*9e82028bSJeremy L Thompson   CeedInt                       num_block, block_size, num_comp, comp_stride;
522*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
523*9e82028bSJeremy L Thompson 
524*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
525*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
526*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
527*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
528*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
529*9e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
530*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
531*9e82028bSJeremy L Thompson }
532*9e82028bSJeremy L Thompson 
533*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
534*9e82028bSJeremy L Thompson // ElemRestriction Apply Unoriented
535*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
536*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
537*9e82028bSJeremy L Thompson                                                        CeedRequest *request) {
538*9e82028bSJeremy L Thompson   CeedInt                       num_block, block_size, num_comp, comp_stride;
539*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
540*9e82028bSJeremy L Thompson 
541*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
542*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
543*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
544*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
545*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
546*9e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
547*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
548*9e82028bSJeremy L Thompson }
549*9e82028bSJeremy L Thompson 
550*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
551*9e82028bSJeremy L Thompson // ElemRestriction Apply Points
552*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
553*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u,
554*9e82028bSJeremy L Thompson                                                               CeedVector v, CeedRequest *request) {
555*9e82028bSJeremy L Thompson   CeedInt                       num_comp;
556*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
557*9e82028bSJeremy L Thompson 
558*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
559*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
560*9e82028bSJeremy L Thompson   return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
561*9e82028bSJeremy L Thompson }
562*9e82028bSJeremy L Thompson 
563*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
564*9e82028bSJeremy L Thompson // ElemRestriction Apply Block
565*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
566*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
567*9e82028bSJeremy L Thompson                                                   CeedRequest *request) {
568*9e82028bSJeremy L Thompson   CeedInt                       block_size, num_comp, comp_stride;
569*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
570*9e82028bSJeremy L Thompson 
571*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
572*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
573*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
574*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
575*9e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
576*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
577*9e82028bSJeremy L Thompson }
578*9e82028bSJeremy L Thompson 
579*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
580*9e82028bSJeremy L Thompson // ElemRestriction Get Offsets
581*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
582*9e82028bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
583*9e82028bSJeremy L Thompson   Ceed                          ceed;
584*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
585*9e82028bSJeremy L Thompson 
586*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
587*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
588*9e82028bSJeremy L Thompson 
589*9e82028bSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
590*9e82028bSJeremy L Thompson 
591*9e82028bSJeremy L Thompson   *offsets = impl->offsets;
592*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
593*9e82028bSJeremy L Thompson }
594*9e82028bSJeremy L Thompson 
595*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
596*9e82028bSJeremy L Thompson // ElemRestriction Get Orientations
597*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
598*9e82028bSJeremy L Thompson static int CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
599*9e82028bSJeremy L Thompson   Ceed                          ceed;
600*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
601*9e82028bSJeremy L Thompson 
602*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
603*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
604*9e82028bSJeremy L Thompson 
605*9e82028bSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
606*9e82028bSJeremy L Thompson 
607*9e82028bSJeremy L Thompson   *orients = impl->orients;
608*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
609*9e82028bSJeremy L Thompson }
610*9e82028bSJeremy L Thompson 
611*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
612*9e82028bSJeremy L Thompson // ElemRestriction Get Curl-Conforming Orientations
613*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
614*9e82028bSJeremy L Thompson static int CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
615*9e82028bSJeremy L Thompson   Ceed                          ceed;
616*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
617*9e82028bSJeremy L Thompson 
618*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
619*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
620*9e82028bSJeremy L Thompson 
621*9e82028bSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
622*9e82028bSJeremy L Thompson 
623*9e82028bSJeremy L Thompson   *curl_orients = impl->curl_orients;
624*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
625*9e82028bSJeremy L Thompson }
626*9e82028bSJeremy L Thompson 
627*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
628*9e82028bSJeremy L Thompson // ElemRestriction Destroy
629*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
630*9e82028bSJeremy L Thompson static int CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr) {
631*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
632*9e82028bSJeremy L Thompson 
633*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
634*9e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
635*9e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->orients_allocated));
636*9e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
637*9e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
638*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
639*9e82028bSJeremy L Thompson }
640*9e82028bSJeremy L Thompson 
641*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
642*9e82028bSJeremy L Thompson // ElemRestriction Create
643*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
644*9e82028bSJeremy L Thompson int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
645*9e82028bSJeremy L Thompson                                        const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
646*9e82028bSJeremy L Thompson   Ceed                          ceed;
647*9e82028bSJeremy L Thompson   CeedInt                       num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
648*9e82028bSJeremy L Thompson   CeedRestrictionType           rstr_type;
649*9e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
650*9e82028bSJeremy L Thompson 
651*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
652*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
653*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
654*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
655*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
656*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
657*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
658*9e82028bSJeremy L Thompson   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
659*9e82028bSJeremy L Thompson 
660*9e82028bSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
661*9e82028bSJeremy L Thompson 
662*9e82028bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
663*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
664*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout));
665*9e82028bSJeremy L Thompson 
666*9e82028bSJeremy L Thompson   // Offsets data
667*9e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
668*9e82028bSJeremy L Thompson   if (rstr_type != CEED_RESTRICTION_STRIDED) {
669*9e82028bSJeremy L Thompson     const char *resource;
670*9e82028bSJeremy L Thompson 
671*9e82028bSJeremy L Thompson     // Check indices for ref or memcheck backends
672*9e82028bSJeremy L Thompson     {
673*9e82028bSJeremy L Thompson       Ceed current = ceed, parent = NULL;
674*9e82028bSJeremy L Thompson 
675*9e82028bSJeremy L Thompson       CeedCallBackend(CeedGetParent(current, &parent));
676*9e82028bSJeremy L Thompson       while (current != parent) {
677*9e82028bSJeremy L Thompson         current = parent;
678*9e82028bSJeremy L Thompson         CeedCallBackend(CeedGetParent(current, &parent));
679*9e82028bSJeremy L Thompson       }
680*9e82028bSJeremy L Thompson       CeedCallBackend(CeedGetResource(parent, &resource));
681*9e82028bSJeremy L Thompson     }
682*9e82028bSJeremy L Thompson     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
683*9e82028bSJeremy L Thompson         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
684*9e82028bSJeremy L Thompson       CeedSize l_size;
685*9e82028bSJeremy L Thompson 
686*9e82028bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
687*9e82028bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
688*9e82028bSJeremy L Thompson         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
689*9e82028bSJeremy L Thompson                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
690*9e82028bSJeremy L Thompson       }
691*9e82028bSJeremy L Thompson     }
692*9e82028bSJeremy L Thompson 
693*9e82028bSJeremy L Thompson     // Copy data
694*9e82028bSJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
695*9e82028bSJeremy L Thompson     num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
696*9e82028bSJeremy L Thompson     switch (copy_mode) {
697*9e82028bSJeremy L Thompson       case CEED_COPY_VALUES:
698*9e82028bSJeremy L Thompson         CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated));
699*9e82028bSJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0]));
700*9e82028bSJeremy L Thompson         impl->offsets = impl->offsets_allocated;
701*9e82028bSJeremy L Thompson         break;
702*9e82028bSJeremy L Thompson       case CEED_OWN_POINTER:
703*9e82028bSJeremy L Thompson         impl->offsets_allocated = (CeedInt *)offsets;
704*9e82028bSJeremy L Thompson         impl->offsets           = impl->offsets_allocated;
705*9e82028bSJeremy L Thompson         break;
706*9e82028bSJeremy L Thompson       case CEED_USE_POINTER:
707*9e82028bSJeremy L Thompson         impl->offsets = offsets;
708*9e82028bSJeremy L Thompson     }
709*9e82028bSJeremy L Thompson 
710*9e82028bSJeremy L Thompson     // Orientation data
711*9e82028bSJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
712*9e82028bSJeremy L Thompson       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
713*9e82028bSJeremy L Thompson       switch (copy_mode) {
714*9e82028bSJeremy L Thompson         case CEED_COPY_VALUES:
715*9e82028bSJeremy L Thompson           CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated));
716*9e82028bSJeremy L Thompson           memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0]));
717*9e82028bSJeremy L Thompson           impl->orients = impl->orients_allocated;
718*9e82028bSJeremy L Thompson           break;
719*9e82028bSJeremy L Thompson         case CEED_OWN_POINTER:
720*9e82028bSJeremy L Thompson           impl->orients_allocated = (bool *)orients;
721*9e82028bSJeremy L Thompson           impl->orients           = impl->orients_allocated;
722*9e82028bSJeremy L Thompson           break;
723*9e82028bSJeremy L Thompson         case CEED_USE_POINTER:
724*9e82028bSJeremy L Thompson           impl->orients = orients;
725*9e82028bSJeremy L Thompson       }
726*9e82028bSJeremy L Thompson     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
727*9e82028bSJeremy L Thompson       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
728*9e82028bSJeremy L Thompson       switch (copy_mode) {
729*9e82028bSJeremy L Thompson         case CEED_COPY_VALUES:
730*9e82028bSJeremy L Thompson           CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated));
731*9e82028bSJeremy L Thompson           memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0]));
732*9e82028bSJeremy L Thompson           impl->curl_orients = impl->curl_orients_allocated;
733*9e82028bSJeremy L Thompson           break;
734*9e82028bSJeremy L Thompson         case CEED_OWN_POINTER:
735*9e82028bSJeremy L Thompson           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
736*9e82028bSJeremy L Thompson           impl->curl_orients           = impl->curl_orients_allocated;
737*9e82028bSJeremy L Thompson           break;
738*9e82028bSJeremy L Thompson         case CEED_USE_POINTER:
739*9e82028bSJeremy L Thompson           impl->curl_orients = curl_orients;
740*9e82028bSJeremy L Thompson       }
741*9e82028bSJeremy L Thompson     }
742*9e82028bSJeremy L Thompson   }
743*9e82028bSJeremy L Thompson 
744*9e82028bSJeremy L Thompson   // Set apply function
745*9e82028bSJeremy L Thompson   impl->Apply = CeedElemRestrictionApply_Memcheck_Core;
746*9e82028bSJeremy L Thompson 
747*9e82028bSJeremy L Thompson   // Register backend functions
748*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Memcheck));
749*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Memcheck));
750*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Memcheck));
751*9e82028bSJeremy L Thompson   if (rstr_type == CEED_RESTRICTION_POINTS) {
752*9e82028bSJeremy L Thompson     CeedCallBackend(
753*9e82028bSJeremy L Thompson         CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Memcheck));
754*9e82028bSJeremy L Thompson   }
755*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Memcheck));
756*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Memcheck));
757*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Memcheck));
758*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Memcheck));
759*9e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Memcheck));
760*9e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
761*9e82028bSJeremy L Thompson }
762*9e82028bSJeremy L Thompson 
763*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------
764