xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 171d97d0be1a6a7597730b8cc2bec09fb6a98aac)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
20                                                                       CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
21                                                                       CeedSize v_offset, const CeedScalar *__restrict__ uu,
22                                                                       CeedScalar *__restrict__ vv) {
23   // No offsets provided, identity restriction
24   bool has_backend_strides;
25 
26   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
27   if (has_backend_strides) {
28     // CPU backend strides are {1, elem_size, elem_size*num_comp}
29     // This if branch is left separate to allow better inlining
30     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
31       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
32         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
33           CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
34             vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
35                 uu[n + k * (CeedSize)elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp];
36           }
37         }
38       }
39     }
40   } else {
41     // User provided strides
42     CeedInt strides[3];
43 
44     CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
45     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
46       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
47         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
48           CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
49             vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
50                 uu[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]];
51           }
52         }
53       }
54     }
55   }
56   return CEED_ERROR_SUCCESS;
57 }
58 
59 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
60                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
61                                                                      CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
62                                                                      CeedScalar *__restrict__ vv) {
63   // Default restriction with offsets
64   CeedElemRestriction_Ref *impl;
65 
66   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
67   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
68     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
69       CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) {
70         vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
71       }
72     }
73   }
74   return CEED_ERROR_SUCCESS;
75 }
76 
77 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
78                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
79                                                                        CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
80                                                                        CeedScalar *__restrict__ vv) {
81   // Restriction with orientations
82   CeedElemRestriction_Ref *impl;
83 
84   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
85   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
86     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
87       CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) {
88         vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] =
89             uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
90       }
91     }
92   }
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
97                                                                            const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
98                                                                            CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
99                                                                            CeedScalar *__restrict__ vv) {
100   // Restriction with tridiagonal transformation
101   CeedElemRestriction_Ref *impl;
102 
103   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
104   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
105     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
106       CeedInt n = 0;
107 
108       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
109         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
110             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
111                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
112             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
113                 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
114       }
115       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
116         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
117           vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
118               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
119                   impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
120               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
121                   impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
122               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
123                   impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
124         }
125       }
126       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
127         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
128             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
129                 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
130             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
131                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
132       }
133     }
134   }
135   return CEED_ERROR_SUCCESS;
136 }
137 
138 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
139                                                                                    const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
140                                                                                    CeedInt stop, CeedInt num_elem, CeedInt elem_size,
141                                                                                    CeedSize v_offset, const CeedScalar *__restrict__ uu,
142                                                                                    CeedScalar *__restrict__ vv) {
143   // Restriction with (unsigned) tridiagonal transformation
144   CeedElemRestriction_Ref *impl;
145 
146   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
147   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
148     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
149       CeedInt n = 0;
150 
151       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
152         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
153             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
154                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
155             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
156                 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
157       }
158       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
159         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
160           vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
161               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
162                   abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
163               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
164                   abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
165               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
166                   abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
167         }
168       }
169       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
170         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
171             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
172                 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
173             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
174                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
175       }
176     }
177   }
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
182                                                                     CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
183                                                                     CeedSize v_offset, const CeedScalar *__restrict__ uu,
184                                                                     CeedScalar *__restrict__ vv) {
185   // No offsets provided, identity restriction
186   bool has_backend_strides;
187 
188   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
189   if (has_backend_strides) {
190     // CPU backend strides are {1, elem_size, elem_size*num_comp}
191     // This if brach is left separate to allow better inlining
192     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
193       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
194         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
195           CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
196             vv[n + k * (CeedSize)elem_size + (e + j) * elem_size * (CeedSize)num_comp] +=
197                 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset];
198           }
199         }
200       }
201     }
202   } else {
203     // User provided strides
204     CeedInt strides[3];
205 
206     CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
207     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
208       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
209         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
210           CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
211             vv[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + (e + j) * (CeedSize)strides[2]] +=
212                 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset];
213           }
214         }
215       }
216     }
217   }
218   return CEED_ERROR_SUCCESS;
219 }
220 
221 static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
222                                                                    const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
223                                                                    CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
224                                                                    CeedScalar *__restrict__ vv) {
225   // Default restriction with offsets
226   CeedElemRestriction_Ref *impl;
227 
228   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
229   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
230     for (CeedInt k = 0; k < num_comp; k++) {
231       for (CeedInt i = 0; i < elem_size * block_size; i += block_size) {
232         // Iteration bound set to discard padding elements
233         for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
234           CeedScalar vv_loc;
235 
236           vv_loc = uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset];
237           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
238         }
239       }
240     }
241   }
242   return CEED_ERROR_SUCCESS;
243 }
244 
245 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
246                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
247                                                                      CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
248                                                                      CeedScalar *__restrict__ vv) {
249   // Restriction with orientations
250   CeedElemRestriction_Ref *impl;
251 
252   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
253   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
254     for (CeedInt k = 0; k < num_comp; k++) {
255       for (CeedInt i = 0; i < elem_size * block_size; i += block_size) {
256         // Iteration bound set to discard padding elements
257         for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
258           CeedScalar vv_loc;
259 
260           vv_loc =
261               uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
262           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
263         }
264       }
265     }
266   }
267   return CEED_ERROR_SUCCESS;
268 }
269 
270 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
271                                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
272                                                                          CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
273                                                                          CeedScalar *__restrict__ vv) {
274   // Restriction with tridiagonal transformation
275   CeedElemRestriction_Ref *impl;
276   CeedScalar               vv_loc[block_size];
277 
278   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
279   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
280     for (CeedInt k = 0; k < num_comp; k++) {
281       // Iteration bound set to discard padding elements
282       const CeedInt block_end = CeedIntMin(block_size, num_elem - e);
283       CeedInt       n         = 0;
284 
285       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
286         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
287                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
288                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
289                         impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
290       }
291       for (CeedInt j = 0; j < block_end; j++) {
292         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
293       }
294       for (n = 1; n < elem_size - 1; n++) {
295         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
296           vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
297                           impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
298                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
299                           impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
300                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
301                           impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
302         }
303         for (CeedInt j = 0; j < block_end; j++) {
304           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
305         }
306       }
307       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
308         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
309                         impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
310                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
311                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
312       }
313       for (CeedInt j = 0; j < block_end; j++) {
314         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
315       }
316     }
317   }
318   return CEED_ERROR_SUCCESS;
319 }
320 
321 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
322                                                                                  const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
323                                                                                  CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
324                                                                                  const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
325   // Restriction with (unsigned) tridiagonal transformation
326   CeedElemRestriction_Ref *impl;
327   CeedScalar               vv_loc[block_size];
328 
329   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
330   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
331     for (CeedInt k = 0; k < num_comp; k++) {
332       // Iteration bound set to discard padding elements
333       const CeedInt block_end = CeedIntMin(block_size, num_elem - e);
334       CeedInt       n         = 0;
335 
336       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
337         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
338                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
339                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
340                         abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
341       }
342       for (CeedInt j = 0; j < block_end; j++) {
343         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
344       }
345       for (n = 1; n < elem_size - 1; n++) {
346         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
347           vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
348                           abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
349                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
350                           abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
351                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
352                           abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
353         }
354         for (CeedInt j = 0; j < block_end; j++) {
355           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
356         }
357       }
358       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
359         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
360                         abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
361                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
362                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
363       }
364       for (CeedInt j = 0; j < block_end; j++) {
365         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
366       }
367     }
368   }
369   return CEED_ERROR_SUCCESS;
370 }
371 
372 static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop,
373                                                                      CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
374                                                                      CeedScalar *__restrict__ vv) {
375   CeedInt                  num_points, l_vec_offset;
376   CeedSize                 e_vec_offset = 0;
377   CeedElemRestriction_Ref *impl;
378 
379   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
380   for (CeedInt e = start; e < stop; e++) {
381     l_vec_offset = impl->offsets[e];
382     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
383     if (t_mode == CEED_NOTRANSPOSE) {
384       for (CeedInt i = 0; i < num_points; i++) {
385         for (CeedInt j = 0; j < num_comp; j++) vv[j * (CeedSize)num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
386       }
387     } else {
388       for (CeedInt i = 0; i < num_points; i++) {
389         for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * (CeedSize)num_points + i + e_vec_offset];
390       }
391     }
392     e_vec_offset += num_points * (CeedSize)num_comp;
393   }
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
398                                                     const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs,
399                                                     bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
400   CeedInt             num_elem, elem_size;
401   CeedSize            v_offset = 0;
402   CeedRestrictionType rstr_type;
403   const CeedScalar   *uu;
404   CeedScalar         *vv;
405 
406   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
407   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
408   v_offset = start * block_size * elem_size * (CeedSize)num_comp;
409   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
410   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
411 
412   if (t_mode == CEED_TRANSPOSE) {
413     // Sum into for transpose mode, E-vector to L-vector
414     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
415   } else {
416     // Overwrite for notranspose mode, L-vector to E-vector
417     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
418   }
419   if (t_mode == CEED_TRANSPOSE) {
420     // Restriction from E-vector to L-vector
421     // Performing v += r^T * u
422     // uu has shape [elem_size, num_comp, num_elem], row-major
423     // vv has shape [nnodes, num_comp]
424     // Sum into for transpose mode
425     switch (rstr_type) {
426       case CEED_RESTRICTION_STRIDED:
427         CeedCallBackend(
428             CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
429         break;
430       case CEED_RESTRICTION_STANDARD:
431         CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
432                                                                          v_offset, uu, vv));
433         break;
434       case CEED_RESTRICTION_ORIENTED:
435         if (use_signs) {
436           CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
437                                                                              elem_size, v_offset, uu, vv));
438         } else {
439           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
440                                                                            v_offset, uu, vv));
441         }
442         break;
443       case CEED_RESTRICTION_CURL_ORIENTED:
444         if (use_signs && use_orients) {
445           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
446                                                                                  elem_size, v_offset, uu, vv));
447         } else if (use_orients) {
448           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
449                                                                                          num_elem, elem_size, v_offset, uu, vv));
450         } else {
451           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
452                                                                            v_offset, uu, vv));
453         }
454         break;
455       case CEED_RESTRICTION_POINTS:
456         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
457         break;
458     }
459   } else {
460     // Restriction from L-vector to E-vector
461     // Perform: v = r * u
462     // vv has shape [elem_size, num_comp, num_elem], row-major
463     // uu has shape [nnodes, num_comp]
464     // Overwrite for notranspose mode
465     switch (rstr_type) {
466       case CEED_RESTRICTION_STRIDED:
467         CeedCallBackend(
468             CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
469         break;
470       case CEED_RESTRICTION_STANDARD:
471         CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
472                                                                            v_offset, uu, vv));
473         break;
474       case CEED_RESTRICTION_ORIENTED:
475         if (use_signs) {
476           CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
477                                                                                elem_size, v_offset, uu, vv));
478         } else {
479           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
480                                                                              elem_size, v_offset, uu, vv));
481         }
482         break;
483       case CEED_RESTRICTION_CURL_ORIENTED:
484         if (use_signs && use_orients) {
485           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
486                                                                                    elem_size, v_offset, uu, vv));
487         } else if (use_orients) {
488           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
489                                                                                            num_elem, elem_size, v_offset, uu, vv));
490         } else {
491           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
492                                                                              elem_size, v_offset, uu, vv));
493         }
494         break;
495       case CEED_RESTRICTION_POINTS:
496         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
497         break;
498     }
499   }
500   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
501   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
502   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
503   return CEED_ERROR_SUCCESS;
504 }
505 
506 //------------------------------------------------------------------------------
507 // ElemRestriction Apply - Common Sizes
508 //------------------------------------------------------------------------------
509 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
510                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
511                                             CeedVector v, CeedRequest *request) {
512   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
513 }
514 
515 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
516                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
517                                             CeedVector v, CeedRequest *request) {
518   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
519 }
520 
521 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
522                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
523                                             CeedVector v, CeedRequest *request) {
524   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
525 }
526 
527 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
528                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
529                                             CeedVector v, CeedRequest *request) {
530   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
531 }
532 
533 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
534                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
535                                             CeedVector v, CeedRequest *request) {
536   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
537 }
538 
539 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
540                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
541                                             CeedVector v, CeedRequest *request) {
542   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
543 }
544 
545 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
546                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
547                                             CeedVector v, CeedRequest *request) {
548   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
549 }
550 
551 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
552                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
553                                             CeedVector v, CeedRequest *request) {
554   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
555 }
556 
557 // LCOV_EXCL_START
558 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
559                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
560                                             CeedVector v, CeedRequest *request) {
561   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
562 }
563 // LCOV_EXCL_STOP
564 
565 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
566                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
567                                             CeedVector v, CeedRequest *request) {
568   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
569 }
570 
571 // LCOV_EXCL_START
572 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
573                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
574                                             CeedVector v, CeedRequest *request) {
575   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
576 }
577 // LCOV_EXCL_STOP
578 
579 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
580                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
581                                             CeedVector v, CeedRequest *request) {
582   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
583 }
584 
585 //------------------------------------------------------------------------------
586 // ElemRestriction Apply
587 //------------------------------------------------------------------------------
588 static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
589   CeedInt                  num_block, block_size, num_comp, comp_stride;
590   CeedElemRestriction_Ref *impl;
591 
592   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
593   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
594   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
595   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
596   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
597   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
598   return CEED_ERROR_SUCCESS;
599 }
600 
601 //------------------------------------------------------------------------------
602 // ElemRestriction Apply Unsigned
603 //------------------------------------------------------------------------------
604 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
605                                                 CeedRequest *request) {
606   CeedInt                  num_block, block_size, num_comp, comp_stride;
607   CeedElemRestriction_Ref *impl;
608 
609   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
610   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
611   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
612   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
613   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
614   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
615   return CEED_ERROR_SUCCESS;
616 }
617 
618 //------------------------------------------------------------------------------
619 // ElemRestriction Apply Unoriented
620 //------------------------------------------------------------------------------
621 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
622                                                   CeedRequest *request) {
623   CeedInt                  num_block, block_size, num_comp, comp_stride;
624   CeedElemRestriction_Ref *impl;
625 
626   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
627   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
628   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
629   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
630   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
631   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
632   return CEED_ERROR_SUCCESS;
633 }
634 
635 //------------------------------------------------------------------------------
636 // ElemRestriction Apply Points
637 //------------------------------------------------------------------------------
638 static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
639                                                          CeedRequest *request) {
640   CeedInt                  num_comp;
641   CeedElemRestriction_Ref *impl;
642 
643   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
644   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
645   return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
646 }
647 
648 //------------------------------------------------------------------------------
649 // ElemRestriction Apply Block
650 //------------------------------------------------------------------------------
651 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
652                                              CeedRequest *request) {
653   CeedInt                  block_size, num_comp, comp_stride;
654   CeedElemRestriction_Ref *impl;
655 
656   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
657   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
658   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
659   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
660   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 //------------------------------------------------------------------------------
665 // ElemRestriction Get Offsets
666 //------------------------------------------------------------------------------
667 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
668   CeedElemRestriction_Ref *impl;
669 
670   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
671 
672   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
673 
674   *offsets = impl->offsets;
675   return CEED_ERROR_SUCCESS;
676 }
677 
678 //------------------------------------------------------------------------------
679 // ElemRestriction Get Orientations
680 //------------------------------------------------------------------------------
681 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
682   CeedElemRestriction_Ref *impl;
683 
684   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
685 
686   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
687 
688   *orients = impl->orients;
689   return CEED_ERROR_SUCCESS;
690 }
691 
692 //------------------------------------------------------------------------------
693 // ElemRestriction Get Curl-Conforming Orientations
694 //------------------------------------------------------------------------------
695 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
696   CeedElemRestriction_Ref *impl;
697 
698   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
699 
700   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
701 
702   *curl_orients = impl->curl_orients;
703   return CEED_ERROR_SUCCESS;
704 }
705 
706 //------------------------------------------------------------------------------
707 // ElemRestriction Destroy
708 //------------------------------------------------------------------------------
709 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) {
710   CeedElemRestriction_Ref *impl;
711 
712   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
713   CeedCallBackend(CeedFree(&impl->offsets_allocated));
714   CeedCallBackend(CeedFree(&impl->orients_allocated));
715   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
716   CeedCallBackend(CeedFree(&impl));
717   return CEED_ERROR_SUCCESS;
718 }
719 
720 //------------------------------------------------------------------------------
721 // ElemRestriction Create
722 //------------------------------------------------------------------------------
723 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
724                                   const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
725   Ceed                     ceed;
726   CeedInt                  num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
727   CeedRestrictionType      rstr_type;
728   CeedElemRestriction_Ref *impl;
729 
730   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
731   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
732   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
733   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
734   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
735   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
736   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
737   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
738 
739   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
740 
741   CeedCallBackend(CeedCalloc(1, &impl));
742   CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
743 
744   // Set layouts
745   {
746     bool    has_backend_strides;
747     CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
748 
749     CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout));
750     if (rstr_type == CEED_RESTRICTION_STRIDED) {
751       CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
752       if (has_backend_strides) {
753         CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout));
754       }
755     }
756   }
757 
758   // Offsets data
759   if (rstr_type != CEED_RESTRICTION_STRIDED) {
760     const char *resource;
761 
762     // Check indices for ref or memcheck backends
763     {
764       Ceed current = ceed, parent = NULL;
765 
766       CeedCallBackend(CeedGetParent(current, &parent));
767       while (current != parent) {
768         current = parent;
769         CeedCallBackend(CeedGetParent(current, &parent));
770       }
771       CeedCallBackend(CeedGetResource(parent, &resource));
772     }
773     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
774         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
775       CeedSize l_size;
776 
777       CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
778       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
779         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
780                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
781       }
782     }
783 
784     // Copy data
785     if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
786     num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
787     switch (copy_mode) {
788       case CEED_COPY_VALUES:
789         CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated));
790         memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0]));
791         impl->offsets = impl->offsets_allocated;
792         break;
793       case CEED_OWN_POINTER:
794         impl->offsets_allocated = (CeedInt *)offsets;
795         impl->offsets           = impl->offsets_allocated;
796         break;
797       case CEED_USE_POINTER:
798         impl->offsets = offsets;
799     }
800 
801     // Orientation data
802     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
803       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
804       switch (copy_mode) {
805         case CEED_COPY_VALUES:
806           CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated));
807           memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0]));
808           impl->orients = impl->orients_allocated;
809           break;
810         case CEED_OWN_POINTER:
811           impl->orients_allocated = (bool *)orients;
812           impl->orients           = impl->orients_allocated;
813           break;
814         case CEED_USE_POINTER:
815           impl->orients = orients;
816       }
817     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
818       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
819       switch (copy_mode) {
820         case CEED_COPY_VALUES:
821           CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated));
822           memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0]));
823           impl->curl_orients = impl->curl_orients_allocated;
824           break;
825         case CEED_OWN_POINTER:
826           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
827           impl->curl_orients           = impl->curl_orients_allocated;
828           break;
829         case CEED_USE_POINTER:
830           impl->curl_orients = curl_orients;
831       }
832     }
833   }
834 
835   // Set apply function based upon num_comp, block_size, and comp_stride
836   CeedInt index = -1;
837 
838   if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1);
839   switch (index) {
840     case 110:
841       impl->Apply = CeedElemRestrictionApply_Ref_110;
842       break;
843     case 111:
844       impl->Apply = CeedElemRestrictionApply_Ref_111;
845       break;
846     case 180:
847       impl->Apply = CeedElemRestrictionApply_Ref_180;
848       break;
849     case 181:
850       impl->Apply = CeedElemRestrictionApply_Ref_181;
851       break;
852     case 310:
853       impl->Apply = CeedElemRestrictionApply_Ref_310;
854       break;
855     case 311:
856       impl->Apply = CeedElemRestrictionApply_Ref_311;
857       break;
858     case 380:
859       impl->Apply = CeedElemRestrictionApply_Ref_380;
860       break;
861     case 381:
862       impl->Apply = CeedElemRestrictionApply_Ref_381;
863       break;
864     // LCOV_EXCL_START
865     case 510:
866       impl->Apply = CeedElemRestrictionApply_Ref_510;
867       break;
868     // LCOV_EXCL_STOP
869     case 511:
870       impl->Apply = CeedElemRestrictionApply_Ref_511;
871       break;
872     // LCOV_EXCL_START
873     case 580:
874       impl->Apply = CeedElemRestrictionApply_Ref_580;
875       break;
876     // LCOV_EXCL_STOP
877     case 581:
878       impl->Apply = CeedElemRestrictionApply_Ref_581;
879       break;
880     default:
881       impl->Apply = CeedElemRestrictionApply_Ref_Core;
882       break;
883   }
884 
885   // Register backend functions
886   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref));
887   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
888   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref));
889   if (rstr_type == CEED_RESTRICTION_POINTS) {
890     CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref));
891   }
892   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
893   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
894   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
895   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
896   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref));
897   return CEED_ERROR_SUCCESS;
898 }
899 
900 //------------------------------------------------------------------------------
901