xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 7c1dbaff56f8afaddfc574209b09713e25514e81)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
19 static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
20                                                                const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v,
21                                                                CeedRequest *request) {
22   CeedElemRestriction_Ref *impl;
23   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
24   const CeedScalar *uu;
25   CeedScalar       *vv;
26   CeedInt           num_elem, elem_size, v_offset;
27   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
28   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
29   v_offset = start * blk_size * elem_size * num_comp;
30   CeedRestrictionType rstr_type;
31   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
32 
33   // Restriction from L-vector to E-vector
34   // Perform: v = r * u
35   // vv has shape [elem_size, num_comp, num_elem], row-major
36   // uu has shape [nnodes, num_comp]
37   // Overwrite for notranspose mode
38   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
39   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
40   switch (rstr_type) {
41     case CEED_RESTRICTION_STRIDED: {
42       // No offsets provided, Identity Restriction
43       bool has_backend_strides;
44       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
45       if (has_backend_strides) {
46         // CPU backend strides are {1, elem_size, elem_size*num_comp}
47         // This if branch is left separate to allow better inlining
48         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
49           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
50             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
51               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
52                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
53                     uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
54               }
55             }
56           }
57         }
58       } else {
59         // User provided strides
60         CeedInt strides[3];
61         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
62         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
63           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
64             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
65               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
66                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
67                     uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
68               }
69             }
70           }
71         }
72       }
73     } break;
74     case CEED_RESTRICTION_DEFAULT: {
75       // Default restriction with offsets
76       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
77         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
78           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
79             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
80           }
81         }
82       }
83     } break;
84     case CEED_RESTRICTION_ORIENTED: {
85       // Restriction with orientations
86       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
87         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
88           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
89             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] =
90                 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
91           }
92         }
93       }
94     } break;
95     case CEED_RESTRICTION_CURL_ORIENTED: {
96       // Restriction with tridiagonal transformation
97       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
98         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
99           CeedInt n = 0;
100           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
101             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
102                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
103                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
104                 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
105                     impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size];
106           }
107           for (n = 1; n < elem_size - 1; n++) {
108             CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
109               vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
110                   uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
111                       impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] +
112                   uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
113                       impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
114                   uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
115                       impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size];
116             }
117           }
118           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
119             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
120                 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
121                     impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] +
122                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
123                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size];
124           }
125         }
126       }
127     } break;
128   }
129   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
130   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
131   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 //------------------------------------------------------------------------------
136 // Core Unsigned ElemRestriction Apply Code
137 //------------------------------------------------------------------------------
138 static inline int CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
139                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
140                                                                        CeedVector v, CeedRequest *request) {
141   Ceed ceed;
142   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
143   CeedElemRestriction_Ref *impl;
144   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
145   const CeedScalar *uu;
146   CeedScalar       *vv;
147   CeedInt           num_elem, elem_size, v_offset;
148   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
149   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
150   v_offset = start * blk_size * elem_size * num_comp;
151   CeedRestrictionType rstr_type;
152   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
153 
154   // Restriction from L-vector to E-vector
155   // Perform: v = r * u
156   // vv has shape [elem_size, num_comp, num_elem], row-major
157   // uu has shape [nnodes, num_comp]
158   // Overwrite for notranspose mode
159   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
160   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
161   switch (rstr_type) {
162     case CEED_RESTRICTION_ORIENTED: {
163       // Restriction with orientations
164       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
165         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
166           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
167             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
168           }
169         }
170       }
171     } break;
172     case CEED_RESTRICTION_CURL_ORIENTED: {
173       // Restriction with tridiagonal transformation
174       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
175         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
176           CeedInt n = 0;
177           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
178             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
179                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
180                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
181                 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
182                     abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]);
183           }
184           for (n = 1; n < elem_size - 1; n++) {
185             CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
186               vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
187                   uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
188                       abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) +
189                   uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
190                       abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
191                   uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
192                       abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]);
193             }
194           }
195           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
196             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
197                 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
198                     abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) +
199                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
200                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]);
201           }
202         }
203       }
204     } break;
205     // LCOV_EXCL_START
206     case CEED_RESTRICTION_STRIDED:
207       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
208     case CEED_RESTRICTION_DEFAULT:
209       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
210       // LCOV_EXCL_STOP
211   }
212   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
213   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
214   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
215   return CEED_ERROR_SUCCESS;
216 }
217 
218 //------------------------------------------------------------------------------
219 // Core Unoriented ElemRestriction Apply Code
220 //------------------------------------------------------------------------------
221 static inline int CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
222                                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
223                                                                          CeedVector v, CeedRequest *request) {
224   Ceed ceed;
225   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
226   CeedElemRestriction_Ref *impl;
227   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
228   const CeedScalar *uu;
229   CeedScalar       *vv;
230   CeedInt           num_elem, elem_size, v_offset;
231   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
232   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
233   v_offset = start * blk_size * elem_size * num_comp;
234   CeedRestrictionType rstr_type;
235   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
236 
237   // Restriction from L-vector to E-vector
238   // Perform: v = r * u
239   // vv has shape [elem_size, num_comp, num_elem], row-major
240   // uu has shape [nnodes, num_comp]
241   // Overwrite for notranspose mode
242   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
243   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
244   switch (rstr_type) {
245     case CEED_RESTRICTION_CURL_ORIENTED: {
246       // Restriction with tridiagonal transformation
247       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
248         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
249           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
250             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
251           }
252         }
253       }
254     } break;
255     // LCOV_EXCL_START
256     case CEED_RESTRICTION_STRIDED:
257       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
258     case CEED_RESTRICTION_DEFAULT:
259       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
260     case CEED_RESTRICTION_ORIENTED:
261       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported");
262       // LCOV_EXCL_STOP
263   }
264   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
265   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
266   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
267   return CEED_ERROR_SUCCESS;
268 }
269 
270 //------------------------------------------------------------------------------
271 // Core ElemRestriction Apply Transpose Code
272 //------------------------------------------------------------------------------
273 static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
274                                                              const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v,
275                                                              CeedRequest *request) {
276   CeedElemRestriction_Ref *impl;
277   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
278   const CeedScalar *uu;
279   CeedScalar       *vv;
280   CeedInt           num_elem, elem_size, v_offset;
281   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
282   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
283   v_offset = start * blk_size * elem_size * num_comp;
284   CeedRestrictionType rstr_type;
285   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
286 
287   // Restriction from E-vector to L-vector
288   // Performing v += r^T * u
289   // uu has shape [elem_size, num_comp, num_elem], row-major
290   // vv has shape [nnodes, num_comp]
291   // Sum into for transpose mode
292   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
293   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
294   switch (rstr_type) {
295     case CEED_RESTRICTION_STRIDED: {
296       // No offsets provided, Identity Restriction
297       bool has_backend_strides;
298       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
299       if (has_backend_strides) {
300         // CPU backend strides are {1, elem_size, elem_size*num_comp}
301         // This if brach is left separate to allow better inlining
302         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
303           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
304             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
305               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
306                 vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
307                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
308               }
309             }
310           }
311         }
312       } else {
313         // User provided strides
314         CeedInt strides[3];
315         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
316         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
317           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
318             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
319               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
320                 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
321                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
322               }
323             }
324           }
325         }
326       }
327     } break;
328     case CEED_RESTRICTION_DEFAULT: {
329       // Default restriction with offsets
330       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
331         for (CeedInt k = 0; k < num_comp; k++) {
332           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
333             // Iteration bound set to discard padding elements
334             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
335               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
336             }
337           }
338         }
339       }
340     } break;
341     case CEED_RESTRICTION_ORIENTED: {
342       // Restriction with orientations
343       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
344         for (CeedInt k = 0; k < num_comp; k++) {
345           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
346             // Iteration bound set to discard padding elements
347             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
348               vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
349                   uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
350             }
351           }
352         }
353       }
354     } break;
355     case CEED_RESTRICTION_CURL_ORIENTED: {
356       // Restriction with tridiagonal transformation
357       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
358         for (CeedInt k = 0; k < num_comp; k++) {
359           // Iteration bound set to discard padding elements
360           CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0;
361           for (CeedInt j = 0; j < blk_end; j++) {
362             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
363                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
364                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
365                 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
366                     impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size];
367           }
368           for (n = 1; n < elem_size - 1; n++) {
369             for (CeedInt j = 0; j < blk_end; j++) {
370               vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
371                   uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
372                       impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] +
373                   uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
374                       impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
375                   uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
376                       impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size];
377             }
378           }
379           for (CeedInt j = 0; j < blk_end; j++) {
380             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
381                 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
382                     impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] +
383                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
384                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size];
385           }
386         }
387       }
388     } break;
389   }
390   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
391   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
392   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
393   return CEED_ERROR_SUCCESS;
394 }
395 
396 //------------------------------------------------------------------------------
397 // Core Unsigned ElemRestriction Apply Transpose Code
398 //------------------------------------------------------------------------------
399 static inline int CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
400                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
401                                                                      CeedVector v, CeedRequest *request) {
402   Ceed ceed;
403   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
404   CeedElemRestriction_Ref *impl;
405   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
406   const CeedScalar *uu;
407   CeedScalar       *vv;
408   CeedInt           num_elem, elem_size, v_offset;
409   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
410   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
411   v_offset = start * blk_size * elem_size * num_comp;
412   CeedRestrictionType rstr_type;
413   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
414 
415   // Restriction from E-vector to L-vector
416   // Performing v += r^T * u
417   // uu has shape [elem_size, num_comp, num_elem], row-major
418   // vv has shape [nnodes, num_comp]
419   // Sum into for transpose mode
420   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
421   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
422   switch (rstr_type) {
423     case CEED_RESTRICTION_ORIENTED: {
424       // Restriction with orientations
425       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
426         for (CeedInt k = 0; k < num_comp; k++) {
427           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
428             // Iteration bound set to discard padding elements
429             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
430               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
431             }
432           }
433         }
434       }
435     } break;
436     case CEED_RESTRICTION_CURL_ORIENTED: {
437       // Restriction with tridiagonal transformation
438       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
439         for (CeedInt k = 0; k < num_comp; k++) {
440           // Iteration bound set to discard padding elements
441           CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0;
442           for (CeedInt j = 0; j < blk_end; j++) {
443             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
444                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
445                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
446                 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
447                     abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]);
448           }
449           for (n = 1; n < elem_size - 1; n++) {
450             for (CeedInt j = 0; j < blk_end; j++) {
451               vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
452                   uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
453                       abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) +
454                   uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
455                       abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
456                   uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
457                       abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]);
458             }
459           }
460           for (CeedInt j = 0; j < blk_end; j++) {
461             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
462                 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
463                     abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) +
464                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
465                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]);
466           }
467         }
468       }
469     } break;
470     // LCOV_EXCL_START
471     case CEED_RESTRICTION_STRIDED:
472       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
473     case CEED_RESTRICTION_DEFAULT:
474       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
475       // LCOV_EXCL_STOP
476   }
477   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
478   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
479   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
480   return CEED_ERROR_SUCCESS;
481 }
482 
483 //------------------------------------------------------------------------------
484 // Core Unoriented ElemRestriction Apply Transpose Code
485 //------------------------------------------------------------------------------
486 static inline int CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
487                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
488                                                                        CeedVector v, CeedRequest *request) {
489   Ceed ceed;
490   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
491   CeedElemRestriction_Ref *impl;
492   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
493   const CeedScalar *uu;
494   CeedScalar       *vv;
495   CeedInt           num_elem, elem_size, v_offset;
496   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
497   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
498   v_offset = start * blk_size * elem_size * num_comp;
499   CeedRestrictionType rstr_type;
500   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
501 
502   // Restriction from E-vector to L-vector
503   // Performing v += r^T * u
504   // uu has shape [elem_size, num_comp, num_elem], row-major
505   // vv has shape [nnodes, num_comp]
506   // Sum into for transpose mode
507   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
508   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
509   switch (rstr_type) {
510     case CEED_RESTRICTION_CURL_ORIENTED: {
511       // Restriction with orientations
512       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
513         for (CeedInt k = 0; k < num_comp; k++) {
514           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
515             // Iteration bound set to discard padding elements
516             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
517               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
518             }
519           }
520         }
521       }
522     } break;
523     // LCOV_EXCL_START
524     case CEED_RESTRICTION_STRIDED:
525       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
526     case CEED_RESTRICTION_DEFAULT:
527       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
528     case CEED_RESTRICTION_ORIENTED:
529       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported");
530       // LCOV_EXCL_STOP
531   }
532   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
533   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
534   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
535   return CEED_ERROR_SUCCESS;
536 }
537 
538 //------------------------------------------------------------------------------
539 // ElemRestriction Apply - Common Sizes
540 //------------------------------------------------------------------------------
541 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
542                                                     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients,
543                                                     CeedVector u, CeedVector v, CeedRequest *request) {
544   CeedRestrictionType rstr_type;
545   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
546   if (t_mode == CEED_TRANSPOSE) {
547     switch (rstr_type) {
548       case CEED_RESTRICTION_STRIDED:
549       case CEED_RESTRICTION_DEFAULT:
550         return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
551       case CEED_RESTRICTION_ORIENTED:
552         if (use_signs) {
553           return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
554         } else {
555           return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
556         }
557       case CEED_RESTRICTION_CURL_ORIENTED:
558         if (use_signs && use_orients) {
559           return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
560         } else if (use_orients) {
561           return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
562         } else {
563           return CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
564         }
565     }
566   } else {
567     switch (rstr_type) {
568       case CEED_RESTRICTION_STRIDED:
569       case CEED_RESTRICTION_DEFAULT:
570         return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
571       case CEED_RESTRICTION_ORIENTED:
572         if (use_signs) {
573           return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
574         } else {
575           return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
576         }
577       case CEED_RESTRICTION_CURL_ORIENTED:
578         if (use_signs && use_orients) {
579           return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
580         } else if (use_orients) {
581           return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
582         } else {
583           return CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
584         }
585     }
586   }
587 }
588 
589 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
590                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
591                                             CeedVector v, CeedRequest *request) {
592   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
593 }
594 
595 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
596                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
597                                             CeedVector v, CeedRequest *request) {
598   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
599 }
600 
601 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
602                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
603                                             CeedVector v, CeedRequest *request) {
604   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
605 }
606 
607 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
608                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
609                                             CeedVector v, CeedRequest *request) {
610   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
611 }
612 
613 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
614                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
615                                             CeedVector v, CeedRequest *request) {
616   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
617 }
618 
619 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
620                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
621                                             CeedVector v, CeedRequest *request) {
622   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
623 }
624 
625 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
626                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
627                                             CeedVector v, CeedRequest *request) {
628   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
629 }
630 
631 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
632                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
633                                             CeedVector v, CeedRequest *request) {
634   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
635 }
636 
637 // LCOV_EXCL_START
638 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
639                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
640                                             CeedVector v, CeedRequest *request) {
641   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
642 }
643 // LCOV_EXCL_STOP
644 
645 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
646                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
647                                             CeedVector v, CeedRequest *request) {
648   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
649 }
650 
651 // LCOV_EXCL_START
652 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
653                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
654                                             CeedVector v, CeedRequest *request) {
655   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
656 }
657 // LCOV_EXCL_STOP
658 
659 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
660                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
661                                             CeedVector v, CeedRequest *request) {
662   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
663 }
664 
665 //------------------------------------------------------------------------------
666 // ElemRestriction Apply
667 //------------------------------------------------------------------------------
668 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
669   CeedInt num_blk, blk_size, num_comp, comp_stride;
670   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
671   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
672   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
673   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
674   CeedElemRestriction_Ref *impl;
675   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
676 
677   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, true, true, u, v, request);
678 }
679 
680 //------------------------------------------------------------------------------
681 // ElemRestriction Apply Unsigned
682 //------------------------------------------------------------------------------
683 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
684   CeedInt num_blk, blk_size, num_comp, comp_stride;
685   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
686   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
687   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
688   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
689   CeedElemRestriction_Ref *impl;
690   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
691 
692   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, true, u, v, request);
693 }
694 
695 //------------------------------------------------------------------------------
696 // ElemRestriction Apply Unoriented
697 //------------------------------------------------------------------------------
698 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
699   CeedInt num_blk, blk_size, num_comp, comp_stride;
700   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
701   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
702   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
703   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
704   CeedElemRestriction_Ref *impl;
705   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
706 
707   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, false, u, v, request);
708 }
709 
710 //------------------------------------------------------------------------------
711 // ElemRestriction Apply Block
712 //------------------------------------------------------------------------------
713 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
714                                              CeedRequest *request) {
715   CeedInt blk_size, num_comp, comp_stride;
716   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
717   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
718   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
719   CeedElemRestriction_Ref *impl;
720   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
721 
722   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request);
723 }
724 
725 //------------------------------------------------------------------------------
726 // ElemRestriction Get Offsets
727 //------------------------------------------------------------------------------
728 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
729   CeedElemRestriction_Ref *impl;
730   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
731   Ceed ceed;
732   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
733 
734   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
735 
736   *offsets = impl->offsets;
737   return CEED_ERROR_SUCCESS;
738 }
739 
740 //------------------------------------------------------------------------------
741 // ElemRestriction Get Orientations
742 //------------------------------------------------------------------------------
743 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
744   CeedElemRestriction_Ref *impl;
745   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
746   Ceed ceed;
747   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
748 
749   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
750 
751   *orients = impl->orients;
752   return CEED_ERROR_SUCCESS;
753 }
754 
755 //------------------------------------------------------------------------------
756 // ElemRestriction Get Curl-Conforming Orientations
757 //------------------------------------------------------------------------------
758 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
759   CeedElemRestriction_Ref *impl;
760   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
761   Ceed ceed;
762   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
763 
764   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
765 
766   *curl_orients = impl->curl_orients;
767   return CEED_ERROR_SUCCESS;
768 }
769 
770 //------------------------------------------------------------------------------
771 // ElemRestriction Destroy
772 //------------------------------------------------------------------------------
773 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
774   CeedElemRestriction_Ref *impl;
775   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
776 
777   CeedCallBackend(CeedFree(&impl->offsets_allocated));
778   CeedCallBackend(CeedFree(&impl->orients_allocated));
779   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
780   CeedCallBackend(CeedFree(&impl));
781   return CEED_ERROR_SUCCESS;
782 }
783 
784 //------------------------------------------------------------------------------
785 // ElemRestriction Create
786 //------------------------------------------------------------------------------
787 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
788                                   const CeedInt8 *curl_orients, CeedElemRestriction r) {
789   CeedElemRestriction_Ref *impl;
790   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
791   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
792   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
793   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
794   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
795   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
796   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
797   Ceed ceed;
798   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
799 
800   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
801   CeedCallBackend(CeedCalloc(1, &impl));
802 
803   // Offsets data
804   CeedRestrictionType rstr_type;
805   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
806   if (rstr_type != CEED_RESTRICTION_STRIDED) {
807     // Check indices for ref or memcheck backends
808     Ceed parent_ceed = ceed, curr_ceed = NULL;
809     while (parent_ceed != curr_ceed) {
810       curr_ceed = parent_ceed;
811       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
812     }
813     const char *resource;
814     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
815     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
816         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
817       CeedSize l_size;
818       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
819 
820       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
821         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
822                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
823       }
824     }
825 
826     // Copy data
827     switch (copy_mode) {
828       case CEED_COPY_VALUES:
829         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
830         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
831         impl->offsets = impl->offsets_allocated;
832         break;
833       case CEED_OWN_POINTER:
834         impl->offsets_allocated = (CeedInt *)offsets;
835         impl->offsets           = impl->offsets_allocated;
836         break;
837       case CEED_USE_POINTER:
838         impl->offsets = offsets;
839     }
840 
841     // Orientation data
842     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
843       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
844       switch (copy_mode) {
845         case CEED_COPY_VALUES:
846           CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated));
847           memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0]));
848           impl->orients = impl->orients_allocated;
849           break;
850         case CEED_OWN_POINTER:
851           impl->orients_allocated = (bool *)orients;
852           impl->orients           = impl->orients_allocated;
853           break;
854         case CEED_USE_POINTER:
855           impl->orients = orients;
856       }
857     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
858       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
859       switch (copy_mode) {
860         case CEED_COPY_VALUES:
861           CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated));
862           memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0]));
863           impl->curl_orients = impl->curl_orients_allocated;
864           break;
865         case CEED_OWN_POINTER:
866           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
867           impl->curl_orients           = impl->curl_orients_allocated;
868           break;
869         case CEED_USE_POINTER:
870           impl->curl_orients = curl_orients;
871       }
872     }
873   }
874 
875   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
876   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
877   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
878   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
879   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
880   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref));
881   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
882   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
883   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
884   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
885   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
886 
887   // Set apply function based upon num_comp, blk_size, and comp_stride
888   CeedInt idx = -1;
889   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
890   switch (idx) {
891     case 110:
892       impl->Apply = CeedElemRestrictionApply_Ref_110;
893       break;
894     case 111:
895       impl->Apply = CeedElemRestrictionApply_Ref_111;
896       break;
897     case 180:
898       impl->Apply = CeedElemRestrictionApply_Ref_180;
899       break;
900     case 181:
901       impl->Apply = CeedElemRestrictionApply_Ref_181;
902       break;
903     case 310:
904       impl->Apply = CeedElemRestrictionApply_Ref_310;
905       break;
906     case 311:
907       impl->Apply = CeedElemRestrictionApply_Ref_311;
908       break;
909     case 380:
910       impl->Apply = CeedElemRestrictionApply_Ref_380;
911       break;
912     case 381:
913       impl->Apply = CeedElemRestrictionApply_Ref_381;
914       break;
915     // LCOV_EXCL_START
916     case 510:
917       impl->Apply = CeedElemRestrictionApply_Ref_510;
918       break;
919     // LCOV_EXCL_STOP
920     case 511:
921       impl->Apply = CeedElemRestrictionApply_Ref_511;
922       break;
923     // LCOV_EXCL_START
924     case 580:
925       impl->Apply = CeedElemRestrictionApply_Ref_580;
926       break;
927     // LCOV_EXCL_STOP
928     case 581:
929       impl->Apply = CeedElemRestrictionApply_Ref_581;
930       break;
931     default:
932       impl->Apply = CeedElemRestrictionApply_Ref_Core;
933       break;
934   }
935 
936   return CEED_ERROR_SUCCESS;
937 }
938 
939 //------------------------------------------------------------------------------
940