xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision cf415216851e455690c5797385b9bba87c798dc1)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
20                                                                       CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
21                                                                       CeedSize v_offset, const CeedScalar *__restrict__ uu,
22                                                                       CeedScalar *__restrict__ vv) {
23   // No offsets provided, identity restriction
24   bool has_backend_strides;
25 
26   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
27   if (has_backend_strides) {
28     // CPU backend strides are {1, elem_size, elem_size*num_comp}
29     // This if branch is left separate to allow better inlining
30     for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
31       CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
32         CeedPragmaSIMD for (CeedSize n = 0; n < elem_size; n++) {
33           CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
34             vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
35                 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp];
36           }
37         }
38       }
39     }
40   } else {
41     // User provided strides
42     CeedInt strides[3];
43 
44     CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
45     for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
46       CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
47         CeedPragmaSIMD for (CeedSize n = 0; n < elem_size; n++) {
48           CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
49             vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
50                 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]];
51           }
52         }
53       }
54     }
55   }
56   return CEED_ERROR_SUCCESS;
57 }
58 
59 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
60                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
61                                                                      CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
62                                                                      CeedScalar *__restrict__ vv) {
63   // Default restriction with offsets
64   CeedElemRestriction_Ref *impl;
65 
66   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
67   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
68     CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
69       CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
70         vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
71       }
72     }
73   }
74   return CEED_ERROR_SUCCESS;
75 }
76 
77 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
78                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
79                                                                        CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
80                                                                        CeedScalar *__restrict__ vv) {
81   // Restriction with orientations
82   CeedElemRestriction_Ref *impl;
83 
84   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
85   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
86     CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
87       CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
88         vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] =
89             uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
90       }
91     }
92   }
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
97                                                                            const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
98                                                                            CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
99                                                                            CeedScalar *__restrict__ vv) {
100   // Restriction with tridiagonal transformation
101   CeedElemRestriction_Ref *impl;
102 
103   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
104   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
105     CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
106       CeedSize n = 0;
107 
108       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
109         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
110             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
111                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
112             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
113                 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
114       }
115       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
116         CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
117           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
118               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
119                   impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
120               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
121                   impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
122               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
123                   impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
124         }
125       }
126       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
127         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
128             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
129                 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
130             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
131                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
132       }
133     }
134   }
135   return CEED_ERROR_SUCCESS;
136 }
137 
138 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
139                                                                                    const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
140                                                                                    CeedInt stop, CeedInt num_elem, CeedInt elem_size,
141                                                                                    CeedSize v_offset, const CeedScalar *__restrict__ uu,
142                                                                                    CeedScalar *__restrict__ vv) {
143   // Restriction with (unsigned) tridiagonal transformation
144   CeedElemRestriction_Ref *impl;
145 
146   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
147   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
148     CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
149       CeedSize n = 0;
150 
151       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
152         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
153             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
154                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
155             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
156                 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
157       }
158       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
159         CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
160           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
161               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
162                   abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
163               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
164                   abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
165               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
166                   abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
167         }
168       }
169       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
170         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
171             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
172                 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
173             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
174                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
175       }
176     }
177   }
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
182                                                                     CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
183                                                                     CeedSize v_offset, const CeedScalar *__restrict__ uu,
184                                                                     CeedScalar *__restrict__ vv) {
185   // No offsets provided, identity restriction
186   bool has_backend_strides;
187 
188   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
189   if (has_backend_strides) {
190     // CPU backend strides are {1, elem_size, elem_size*num_comp}
191     // This if brach is left separate to allow better inlining
192     for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
193       CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
194         CeedPragmaSIMD for (CeedSize n = 0; n < elem_size; n++) {
195           CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
196             vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
197           }
198         }
199       }
200     }
201   } else {
202     // User provided strides
203     CeedInt strides[3];
204 
205     CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
206     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
207       CeedPragmaSIMD for (CeedSize k = 0; k < num_comp; k++) {
208         CeedPragmaSIMD for (CeedSize n = 0; n < elem_size; n++) {
209           CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
210             vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
211                 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
212           }
213         }
214       }
215     }
216   }
217   return CEED_ERROR_SUCCESS;
218 }
219 
220 static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
221                                                                    const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
222                                                                    CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
223                                                                    CeedScalar *__restrict__ vv) {
224   // Default restriction with offsets
225   CeedElemRestriction_Ref *impl;
226 
227   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
228   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
229     for (CeedSize k = 0; k < num_comp; k++) {
230       for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
231         // Iteration bound set to discard padding elements
232         for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
233           CeedScalar vv_loc;
234 
235           vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset];
236           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
237         }
238       }
239     }
240   }
241   return CEED_ERROR_SUCCESS;
242 }
243 
244 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
245                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
246                                                                      CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
247                                                                      CeedScalar *__restrict__ vv) {
248   // Restriction with orientations
249   CeedElemRestriction_Ref *impl;
250 
251   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
252   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
253     for (CeedSize k = 0; k < num_comp; k++) {
254       for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
255         // Iteration bound set to discard padding elements
256         for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
257           CeedScalar vv_loc;
258 
259           vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
260           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
261         }
262       }
263     }
264   }
265   return CEED_ERROR_SUCCESS;
266 }
267 
268 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
269                                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
270                                                                          CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
271                                                                          CeedScalar *__restrict__ vv) {
272   // Restriction with tridiagonal transformation
273   CeedElemRestriction_Ref *impl;
274   CeedScalar               vv_loc[block_size];
275 
276   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
277   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
278     for (CeedSize k = 0; k < num_comp; k++) {
279       // Iteration bound set to discard padding elements
280       const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
281       CeedSize       n         = 0;
282 
283       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
284         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
285                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
286                     uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
287                         impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
288       }
289       for (CeedSize j = 0; j < block_end; j++) {
290         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
291       }
292       for (n = 1; n < elem_size - 1; n++) {
293         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
294           vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
295                           impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
296                       uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
297                           impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
298                       uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
299                           impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
300         }
301         for (CeedSize j = 0; j < block_end; j++) {
302           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
303         }
304       }
305       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
306         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
307                         impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
308                     uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
309                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
310       }
311       for (CeedSize j = 0; j < block_end; j++) {
312         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
313       }
314     }
315   }
316   return CEED_ERROR_SUCCESS;
317 }
318 
319 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
320                                                                                  const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
321                                                                                  CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
322                                                                                  const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
323   // Restriction with (unsigned) tridiagonal transformation
324   CeedElemRestriction_Ref *impl;
325   CeedScalar               vv_loc[block_size];
326 
327   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
328   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
329     for (CeedSize k = 0; k < num_comp; k++) {
330       // Iteration bound set to discard padding elements
331       const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
332       CeedSize       n         = 0;
333 
334       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
335         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
336                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
337                     uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
338                         abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
339       }
340       for (CeedSize j = 0; j < block_end; j++) {
341         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
342       }
343       for (n = 1; n < elem_size - 1; n++) {
344         CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
345           vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
346                           abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
347                       uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
348                           abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
349                       uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
350                           abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
351         }
352         for (CeedSize j = 0; j < block_end; j++) {
353           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
354         }
355       }
356       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
357         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
358                         abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
359                     uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
360                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
361       }
362       for (CeedSize j = 0; j < block_end; j++) {
363         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
364       }
365     }
366   }
367   return CEED_ERROR_SUCCESS;
368 }
369 
370 static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop,
371                                                                      CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
372                                                                      CeedScalar *__restrict__ vv) {
373   CeedInt                  num_points, l_vec_offset;
374   CeedSize                 e_vec_offset = 0;
375   CeedElemRestriction_Ref *impl;
376 
377   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
378   for (CeedInt e = start; e < stop; e++) {
379     l_vec_offset = impl->offsets[e];
380     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
381     if (t_mode == CEED_NOTRANSPOSE) {
382       for (CeedSize i = 0; i < num_points; i++) {
383         for (CeedSize j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
384       }
385     } else {
386       for (CeedSize i = 0; i < num_points; i++) {
387         for (CeedSize j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] += uu[j * num_points + i + e_vec_offset];
388       }
389     }
390     e_vec_offset += num_points * (CeedSize)num_comp;
391   }
392   return CEED_ERROR_SUCCESS;
393 }
394 
395 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
396                                                     const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs,
397                                                     bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
398   CeedInt             num_elem, elem_size;
399   CeedSize            v_offset = 0;
400   CeedRestrictionType rstr_type;
401   const CeedScalar   *uu;
402   CeedScalar         *vv;
403 
404   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
405   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
406   v_offset = start * block_size * elem_size * (CeedSize)num_comp;
407   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
408   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
409 
410   if (t_mode == CEED_TRANSPOSE) {
411     // Sum into for transpose mode, E-vector to L-vector
412     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
413   } else {
414     // Overwrite for notranspose mode, L-vector to E-vector
415     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
416   }
417   if (t_mode == CEED_TRANSPOSE) {
418     // Restriction from E-vector to L-vector
419     // Performing v += r^T * u
420     // uu has shape [elem_size, num_comp, num_elem], row-major
421     // vv has shape [nnodes, num_comp]
422     // Sum into for transpose mode
423     switch (rstr_type) {
424       case CEED_RESTRICTION_STRIDED:
425         CeedCallBackend(
426             CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
427         break;
428       case CEED_RESTRICTION_STANDARD:
429         CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
430                                                                          v_offset, uu, vv));
431         break;
432       case CEED_RESTRICTION_ORIENTED:
433         if (use_signs) {
434           CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
435                                                                              elem_size, v_offset, uu, vv));
436         } else {
437           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
438                                                                            v_offset, uu, vv));
439         }
440         break;
441       case CEED_RESTRICTION_CURL_ORIENTED:
442         if (use_signs && use_orients) {
443           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
444                                                                                  elem_size, v_offset, uu, vv));
445         } else if (use_orients) {
446           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
447                                                                                          num_elem, elem_size, v_offset, uu, vv));
448         } else {
449           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
450                                                                            v_offset, uu, vv));
451         }
452         break;
453       case CEED_RESTRICTION_POINTS:
454         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
455         break;
456     }
457   } else {
458     // Restriction from L-vector to E-vector
459     // Perform: v = r * u
460     // vv has shape [elem_size, num_comp, num_elem], row-major
461     // uu has shape [nnodes, num_comp]
462     // Overwrite for notranspose mode
463     switch (rstr_type) {
464       case CEED_RESTRICTION_STRIDED:
465         CeedCallBackend(
466             CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
467         break;
468       case CEED_RESTRICTION_STANDARD:
469         CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
470                                                                            v_offset, uu, vv));
471         break;
472       case CEED_RESTRICTION_ORIENTED:
473         if (use_signs) {
474           CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
475                                                                                elem_size, v_offset, uu, vv));
476         } else {
477           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
478                                                                              elem_size, v_offset, uu, vv));
479         }
480         break;
481       case CEED_RESTRICTION_CURL_ORIENTED:
482         if (use_signs && use_orients) {
483           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
484                                                                                    elem_size, v_offset, uu, vv));
485         } else if (use_orients) {
486           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
487                                                                                            num_elem, elem_size, v_offset, uu, vv));
488         } else {
489           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
490                                                                              elem_size, v_offset, uu, vv));
491         }
492         break;
493       case CEED_RESTRICTION_POINTS:
494         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
495         break;
496     }
497   }
498   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
499   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
500   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
501   return CEED_ERROR_SUCCESS;
502 }
503 
504 //------------------------------------------------------------------------------
505 // ElemRestriction Apply - Common Sizes
506 //------------------------------------------------------------------------------
507 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
508                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
509                                             CeedVector v, CeedRequest *request) {
510   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
511 }
512 
513 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
514                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
515                                             CeedVector v, CeedRequest *request) {
516   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
517 }
518 
519 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
520                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
521                                             CeedVector v, CeedRequest *request) {
522   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
523 }
524 
525 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
526                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
527                                             CeedVector v, CeedRequest *request) {
528   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
529 }
530 
531 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
532                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
533                                             CeedVector v, CeedRequest *request) {
534   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
535 }
536 
537 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
538                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
539                                             CeedVector v, CeedRequest *request) {
540   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
541 }
542 
543 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
544                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
545                                             CeedVector v, CeedRequest *request) {
546   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
547 }
548 
549 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
550                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
551                                             CeedVector v, CeedRequest *request) {
552   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
553 }
554 
555 // LCOV_EXCL_START
556 static int CeedElemRestrictionApply_Ref_410(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
557                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
558                                             CeedVector v, CeedRequest *request) {
559   return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
560 }
561 
562 static int CeedElemRestrictionApply_Ref_411(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
563                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
564                                             CeedVector v, CeedRequest *request) {
565   return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
566 }
567 
568 static int CeedElemRestrictionApply_Ref_480(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
569                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
570                                             CeedVector v, CeedRequest *request) {
571   return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
572 }
573 
574 static int CeedElemRestrictionApply_Ref_481(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
575                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
576                                             CeedVector v, CeedRequest *request) {
577   return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
578 }
579 
580 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
581                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
582                                             CeedVector v, CeedRequest *request) {
583   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
584 }
585 // LCOV_EXCL_STOP
586 
587 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
588                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
589                                             CeedVector v, CeedRequest *request) {
590   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
591 }
592 
593 // LCOV_EXCL_START
594 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
595                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
596                                             CeedVector v, CeedRequest *request) {
597   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
598 }
599 // LCOV_EXCL_STOP
600 
601 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
602                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
603                                             CeedVector v, CeedRequest *request) {
604   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
605 }
606 
607 //------------------------------------------------------------------------------
608 // ElemRestriction Apply
609 //------------------------------------------------------------------------------
610 static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
611   CeedInt                  num_block, block_size, num_comp, comp_stride;
612   CeedElemRestriction_Ref *impl;
613 
614   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
615   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
616   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
617   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
618   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
619   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
620   return CEED_ERROR_SUCCESS;
621 }
622 
623 //------------------------------------------------------------------------------
624 // ElemRestriction Apply Unsigned
625 //------------------------------------------------------------------------------
626 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
627                                                 CeedRequest *request) {
628   CeedInt                  num_block, block_size, num_comp, comp_stride;
629   CeedElemRestriction_Ref *impl;
630 
631   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
632   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
633   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
634   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
635   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
636   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
637   return CEED_ERROR_SUCCESS;
638 }
639 
640 //------------------------------------------------------------------------------
641 // ElemRestriction Apply Unoriented
642 //------------------------------------------------------------------------------
643 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
644                                                   CeedRequest *request) {
645   CeedInt                  num_block, block_size, num_comp, comp_stride;
646   CeedElemRestriction_Ref *impl;
647 
648   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
649   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
650   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
651   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
652   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
653   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
654   return CEED_ERROR_SUCCESS;
655 }
656 
657 //------------------------------------------------------------------------------
658 // ElemRestriction Apply Points
659 //------------------------------------------------------------------------------
660 static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
661                                                          CeedRequest *request) {
662   CeedInt                  num_comp;
663   CeedElemRestriction_Ref *impl;
664 
665   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
666   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
667   return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
668 }
669 
670 //------------------------------------------------------------------------------
671 // ElemRestriction Apply Block
672 //------------------------------------------------------------------------------
673 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
674                                              CeedRequest *request) {
675   CeedInt                  block_size, num_comp, comp_stride;
676   CeedElemRestriction_Ref *impl;
677 
678   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
679   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
680   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
681   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
682   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
683   return CEED_ERROR_SUCCESS;
684 }
685 
686 //------------------------------------------------------------------------------
687 // ElemRestriction Get Offsets
688 //------------------------------------------------------------------------------
689 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
690   CeedElemRestriction_Ref *impl;
691 
692   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
693 
694   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
695 
696   *offsets = impl->offsets;
697   return CEED_ERROR_SUCCESS;
698 }
699 
700 //------------------------------------------------------------------------------
701 // ElemRestriction Get Orientations
702 //------------------------------------------------------------------------------
703 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
704   CeedElemRestriction_Ref *impl;
705 
706   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
707 
708   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
709 
710   *orients = impl->orients;
711   return CEED_ERROR_SUCCESS;
712 }
713 
714 //------------------------------------------------------------------------------
715 // ElemRestriction Get Curl-Conforming Orientations
716 //------------------------------------------------------------------------------
717 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
718   CeedElemRestriction_Ref *impl;
719 
720   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
721 
722   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
723 
724   *curl_orients = impl->curl_orients;
725   return CEED_ERROR_SUCCESS;
726 }
727 
728 //------------------------------------------------------------------------------
729 // ElemRestriction Destroy
730 //------------------------------------------------------------------------------
731 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) {
732   CeedElemRestriction_Ref *impl;
733 
734   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
735   CeedCallBackend(CeedFree(&impl->offsets_owned));
736   CeedCallBackend(CeedFree(&impl->orients_owned));
737   CeedCallBackend(CeedFree(&impl->curl_orients_owned));
738   CeedCallBackend(CeedFree(&impl));
739   return CEED_ERROR_SUCCESS;
740 }
741 
742 //------------------------------------------------------------------------------
743 // ElemRestriction Create
744 //------------------------------------------------------------------------------
745 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
746                                   const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
747   Ceed                     ceed;
748   CeedInt                  num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
749   CeedRestrictionType      rstr_type;
750   CeedElemRestriction_Ref *impl;
751 
752   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
753   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
754   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
755   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
756   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
757   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
758   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
759   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
760 
761   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
762 
763   CeedCallBackend(CeedCalloc(1, &impl));
764   CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
765 
766   // Set layouts
767   {
768     bool    has_backend_strides;
769     CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
770 
771     CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout));
772     if (rstr_type == CEED_RESTRICTION_STRIDED) {
773       CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
774       if (has_backend_strides) {
775         CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout));
776       }
777     }
778   }
779 
780   // Expand E-vector size for AtPoints
781   if (rstr_type == CEED_RESTRICTION_POINTS) {
782     CeedSize max_points = 0, num_points_total = 0;
783 
784     for (CeedInt i = 0; i < num_elem; i++) {
785       CeedInt num_points = offsets[i + 1] - offsets[i];
786 
787       max_points = CeedIntMax(max_points, num_points);
788       num_points_total += num_points;
789     }
790     // -- Increase size for last element
791     num_points_total += (max_points - (offsets[num_elem] - offsets[num_elem - 1]));
792     CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, num_points_total * num_comp));
793   }
794 
795   // Offsets data
796   if (rstr_type != CEED_RESTRICTION_STRIDED) {
797     const char *resource;
798 
799     // Check indices for ref or memcheck backends
800     {
801       Ceed current = ceed, ceed_parent = NULL;
802 
803       CeedCallBackend(CeedGetParent(current, &ceed_parent));
804       CeedCallBackend(CeedGetResource(ceed_parent, &resource));
805       CeedCallBackend(CeedDestroy(&ceed_parent));
806     }
807     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked")) {
808       CeedSize l_size;
809 
810       CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
811       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
812         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
813                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
814       }
815     }
816 
817     // Copy data
818     if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
819     num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
820     CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, num_offsets, &impl->offsets_owned, &impl->offsets_borrowed, &impl->offsets));
821 
822     // Orientation data
823     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
824       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
825       CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, num_offsets, &impl->orients_owned, &impl->orients_borrowed, &impl->orients));
826     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
827       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
828       CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * num_offsets, &impl->curl_orients_owned, &impl->curl_orients_borrowed,
829                                                &impl->curl_orients));
830     }
831   }
832 
833   // Set apply function based upon num_comp, block_size, and comp_stride
834   CeedInt index = -1;
835 
836   if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1);
837   switch (index) {
838     case 110:
839       impl->Apply = CeedElemRestrictionApply_Ref_110;
840       break;
841     case 111:
842       impl->Apply = CeedElemRestrictionApply_Ref_111;
843       break;
844     case 180:
845       impl->Apply = CeedElemRestrictionApply_Ref_180;
846       break;
847     case 181:
848       impl->Apply = CeedElemRestrictionApply_Ref_181;
849       break;
850     case 310:
851       impl->Apply = CeedElemRestrictionApply_Ref_310;
852       break;
853     case 311:
854       impl->Apply = CeedElemRestrictionApply_Ref_311;
855       break;
856     case 380:
857       impl->Apply = CeedElemRestrictionApply_Ref_380;
858       break;
859     case 381:
860       impl->Apply = CeedElemRestrictionApply_Ref_381;
861       break;
862     // LCOV_EXCL_START
863     case 410:
864       impl->Apply = CeedElemRestrictionApply_Ref_410;
865       break;
866     case 411:
867       impl->Apply = CeedElemRestrictionApply_Ref_411;
868       break;
869     case 480:
870       impl->Apply = CeedElemRestrictionApply_Ref_480;
871       break;
872     case 481:
873       impl->Apply = CeedElemRestrictionApply_Ref_481;
874       break;
875     case 510:
876       impl->Apply = CeedElemRestrictionApply_Ref_510;
877       break;
878     // LCOV_EXCL_STOP
879     case 511:
880       impl->Apply = CeedElemRestrictionApply_Ref_511;
881       break;
882     // LCOV_EXCL_START
883     case 580:
884       impl->Apply = CeedElemRestrictionApply_Ref_580;
885       break;
886     // LCOV_EXCL_STOP
887     case 581:
888       impl->Apply = CeedElemRestrictionApply_Ref_581;
889       break;
890     default:
891       impl->Apply = CeedElemRestrictionApply_Ref_Core;
892       break;
893   }
894 
895   // Register backend functions
896   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref));
897   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
898   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref));
899   if (rstr_type == CEED_RESTRICTION_POINTS) {
900     CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref));
901   }
902   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
903   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
904   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
905   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
906   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref));
907   CeedCallBackend(CeedDestroy(&ceed));
908   return CEED_ERROR_SUCCESS;
909 }
910 
911 //------------------------------------------------------------------------------
912