xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision bd403d52388e064913153593f96f75d758a54e86)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
19 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
20                                                     CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u,
21                                                     CeedVector v, CeedRequest *request) {
22   CeedElemRestriction_Ref *impl;
23   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
24   const CeedScalar *uu;
25   CeedScalar       *vv;
26   CeedInt           num_elem, elem_size, v_offset;
27   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
28   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
29   v_offset = start * blk_size * elem_size * num_comp;
30   CeedRestrictionType rstr_type;
31   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
32 
33   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
34   if (t_mode == CEED_TRANSPOSE) {
35     // Sum into for transpose mode, e-vec to l-vec
36     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
37   } else {
38     // Overwrite for notranspose mode, l-vec to e-vec
39     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
40   }
41   if (t_mode == CEED_NOTRANSPOSE) {
42     // Restriction from L-vector to E-vector
43     // Perform: v = r * u
44     // vv has shape [elem_size, num_comp, num_elem], row-major
45     // uu has shape [nnodes, num_comp]
46     switch (rstr_type) {
47       case CEED_RESTRICTION_STRIDED: {
48         // No offsets provided, Identity Restriction
49         bool has_backend_strides;
50         CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
51         if (has_backend_strides) {
52           // CPU backend strides are {1, elem_size, elem_size*num_comp}
53           // This if branch is left separate to allow better inlining
54           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
55             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
56               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
57                 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
58                   vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
59                       uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
60                 }
61               }
62             }
63           }
64         } else {
65           // User provided strides
66           CeedInt strides[3];
67           CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
68           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
69             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
70               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
71                 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
72                   vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
73                       uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
74                 }
75               }
76             }
77           }
78         }
79       } break;
80       case CEED_RESTRICTION_DEFAULT:
81         // Default restriction with offsets
82         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
83           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
84             CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
85               vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = uu[impl->offsets[i + elem_size * e] + k * comp_stride];
86             }
87           }
88         }
89         break;
90       case CEED_RESTRICTION_ORIENTED:
91         // Restriction with orientations
92         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
93           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
94             CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
95               vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
96                   uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (use_orients && impl->orients[i + elem_size * e] ? -1.0 : 1.0);
97             }
98           }
99         }
100         break;
101       case CEED_RESTRICTION_CURL_ORIENTED:
102         // Restriction with tridiagonal transformation
103         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
104           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
105             CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
106               CeedInt ii = 3 * blk_size * (i / blk_size) + i % blk_size;
107               if (use_orients) {
108                 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
109                     uu[impl->offsets[i + 0 * blk_size + elem_size * e] + k * comp_stride] *
110                         impl->curl_orients[ii + 1 * blk_size + 3 * elem_size * e] +
111                     (i >= blk_size ? uu[impl->offsets[i - 1 * blk_size + elem_size * e] + k * comp_stride] *
112                                          impl->curl_orients[ii + 0 * blk_size + 3 * elem_size * e]
113                                    : 0.0) +
114                     (i < (elem_size - 1) * blk_size ? uu[impl->offsets[i + 1 * blk_size + elem_size * e] + k * comp_stride] *
115                                                           impl->curl_orients[ii + 2 * blk_size + 3 * elem_size * e]
116                                                     : 0.0);
117               } else {
118                 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
119                     uu[impl->offsets[i + 0 * blk_size + elem_size * e] + k * comp_stride] *
120                         abs(impl->curl_orients[ii + 1 * blk_size + 3 * elem_size * e]) +
121                     (i >= blk_size ? uu[impl->offsets[i - 1 * blk_size + elem_size * e] + k * comp_stride] *
122                                          abs(impl->curl_orients[ii + 0 * blk_size + 3 * elem_size * e])
123                                    : 0.0) +
124                     (i < (elem_size - 1) * blk_size ? uu[impl->offsets[i + 1 * blk_size + elem_size * e] + k * comp_stride] *
125                                                           abs(impl->curl_orients[ii + 2 * blk_size + 3 * elem_size * e])
126                                                     : 0.0);
127               }
128             }
129           }
130         }
131         break;
132     }
133   } else {
134     // Restriction from E-vector to L-vector
135     // Performing v += r^T * u
136     // uu has shape [elem_size, num_comp, num_elem], row-major
137     // vv has shape [nnodes, num_comp]
138     switch (rstr_type) {
139       case CEED_RESTRICTION_STRIDED: {
140         // No offsets provided, Identity Restriction
141         bool has_backend_strides;
142         CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
143         if (has_backend_strides) {
144           // CPU backend strides are {1, elem_size, elem_size*num_comp}
145           // This if brach is left separate to allow better inlining
146           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
147             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
148               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
149                 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
150                   vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
151                       uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
152                 }
153               }
154             }
155           }
156         } else {
157           // User provided strides
158           CeedInt strides[3];
159           CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
160           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
161             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
162               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
163                 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
164                   vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
165                       uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
166                 }
167               }
168             }
169           }
170         }
171       } break;
172       case CEED_RESTRICTION_DEFAULT:
173         // Default restriction with offsets
174         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
175           for (CeedInt k = 0; k < num_comp; k++) {
176             for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
177               // Iteration bound set to discard padding elements
178               for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
179                 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset];
180               }
181             }
182           }
183         }
184         break;
185       case CEED_RESTRICTION_ORIENTED:
186         // Restriction with orientations
187         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
188           for (CeedInt k = 0; k < num_comp; k++) {
189             for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
190               // Iteration bound set to discard padding elements
191               for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
192                 vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
193                     uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0);
194               }
195             }
196           }
197         }
198         break;
199       case CEED_RESTRICTION_CURL_ORIENTED:
200         // Restriction with tridiagonal transformation
201         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
202           for (CeedInt k = 0; k < num_comp; k++) {
203             for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
204               // Iteration bound set to discard padding elements
205               for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
206                 CeedInt jj = 3 * blk_size * (j / blk_size + 0) + j % blk_size;
207                 if (use_orients) {
208                   vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
209                       uu[elem_size * (k * blk_size + num_comp * e) + j + 0 * blk_size - v_offset] *
210                           impl->curl_orients[jj + 1 * blk_size + e * 3 * elem_size] +
211                       (j >= blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j - 1 * blk_size - v_offset] *
212                                            impl->curl_orients[jj - 1 * blk_size + e * 3 * elem_size]
213                                      : 0.0) +
214                       (j < (elem_size - 1) * blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j + 1 * blk_size - v_offset] *
215                                                             impl->curl_orients[jj + 3 * blk_size + e * 3 * elem_size]
216                                                       : 0.0);
217                 } else {
218                   vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
219                       uu[elem_size * (k * blk_size + num_comp * e) + j + 0 * blk_size - v_offset] *
220                           abs(impl->curl_orients[jj + 1 * blk_size + e * 3 * elem_size]) +
221                       (j >= blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j - 1 * blk_size - v_offset] *
222                                            abs(impl->curl_orients[jj - 1 * blk_size + e * 3 * elem_size])
223                                      : 0.0) +
224                       (j < (elem_size - 1) * blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j + 1 * blk_size - v_offset] *
225                                                             abs(impl->curl_orients[jj + 3 * blk_size + e * 3 * elem_size])
226                                                       : 0.0);
227                 }
228               }
229             }
230           }
231         }
232         break;
233     }
234   }
235   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
236   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
237   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
238   return CEED_ERROR_SUCCESS;
239 }
240 
241 //------------------------------------------------------------------------------
242 // ElemRestriction Apply - Common Sizes
243 //------------------------------------------------------------------------------
244 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
245                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
246                                             CeedRequest *request) {
247   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request);
248 }
249 
250 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
251                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
252                                             CeedRequest *request) {
253   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orients, t_mode, u, v, request);
254 }
255 
256 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
257                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
258                                             CeedRequest *request) {
259   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request);
260 }
261 
262 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
263                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
264                                             CeedRequest *request) {
265   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orients, t_mode, u, v, request);
266 }
267 
268 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
269                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
270                                             CeedRequest *request) {
271   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request);
272 }
273 
274 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
275                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
276                                             CeedRequest *request) {
277   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orients, t_mode, u, v, request);
278 }
279 
280 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
281                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
282                                             CeedRequest *request) {
283   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request);
284 }
285 
286 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
287                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
288                                             CeedRequest *request) {
289   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orients, t_mode, u, v, request);
290 }
291 
292 // LCOV_EXCL_START
293 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
294                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
295                                             CeedRequest *request) {
296   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request);
297 }
298 // LCOV_EXCL_STOP
299 
300 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
301                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
302                                             CeedRequest *request) {
303   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orients, t_mode, u, v, request);
304 }
305 
306 // LCOV_EXCL_START
307 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
308                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
309                                             CeedRequest *request) {
310   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request);
311 }
312 // LCOV_EXCL_STOP
313 
314 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
315                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
316                                             CeedRequest *request) {
317   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orients, t_mode, u, v, request);
318 }
319 
320 //------------------------------------------------------------------------------
321 // ElemRestriction Apply
322 //------------------------------------------------------------------------------
323 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
324   CeedInt num_blk, blk_size, num_comp, comp_stride;
325   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
326   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
327   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
328   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
329   CeedElemRestriction_Ref *impl;
330   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
331 
332   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request);
333 }
334 
335 //------------------------------------------------------------------------------
336 // ElemRestriction Apply Unsigned
337 //------------------------------------------------------------------------------
338 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
339   CeedInt num_blk, blk_size, num_comp, comp_stride;
340   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
341   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
342   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
343   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
344   CeedElemRestriction_Ref *impl;
345   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
346 
347   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request);
348 }
349 
350 //------------------------------------------------------------------------------
351 // ElemRestriction Apply Block
352 //------------------------------------------------------------------------------
353 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
354                                              CeedRequest *request) {
355   CeedInt blk_size, num_comp, comp_stride;
356   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
357   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
358   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
359   CeedElemRestriction_Ref *impl;
360   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
361 
362   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request);
363 }
364 
365 //------------------------------------------------------------------------------
366 // ElemRestriction Get Offsets
367 //------------------------------------------------------------------------------
368 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
369   CeedElemRestriction_Ref *impl;
370   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
371   Ceed ceed;
372   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
373 
374   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
375 
376   *offsets = impl->offsets;
377   return CEED_ERROR_SUCCESS;
378 }
379 
380 //------------------------------------------------------------------------------
381 // ElemRestriction Get Orientations
382 //------------------------------------------------------------------------------
383 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
384   CeedElemRestriction_Ref *impl;
385   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
386   Ceed ceed;
387   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
388 
389   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
390 
391   *orients = impl->orients;
392   return CEED_ERROR_SUCCESS;
393 }
394 
395 //------------------------------------------------------------------------------
396 // ElemRestriction Get Curl-Conforming Orientations
397 //------------------------------------------------------------------------------
398 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **curl_orients) {
399   CeedElemRestriction_Ref *impl;
400   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
401   Ceed ceed;
402   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
403 
404   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
405 
406   *curl_orients = impl->curl_orients;
407   return CEED_ERROR_SUCCESS;
408 }
409 
410 //------------------------------------------------------------------------------
411 // ElemRestriction Destroy
412 //------------------------------------------------------------------------------
413 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
414   CeedElemRestriction_Ref *impl;
415   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
416 
417   CeedCallBackend(CeedFree(&impl->offsets_allocated));
418   CeedCallBackend(CeedFree(&impl->orients_allocated));
419   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
420   CeedCallBackend(CeedFree(&impl));
421   return CEED_ERROR_SUCCESS;
422 }
423 
424 //------------------------------------------------------------------------------
425 // ElemRestriction Create
426 //------------------------------------------------------------------------------
427 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
428                                   const CeedInt *curl_orients, CeedElemRestriction r) {
429   CeedElemRestriction_Ref *impl;
430   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
431   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
432   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
433   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
434   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
435   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
436   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
437   Ceed ceed;
438   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
439 
440   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
441   CeedCallBackend(CeedCalloc(1, &impl));
442 
443   // Offsets data
444   CeedRestrictionType rstr_type;
445   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
446   if (rstr_type != CEED_RESTRICTION_STRIDED) {
447     // Check indices for ref or memcheck backends
448     Ceed parent_ceed = ceed, curr_ceed = NULL;
449     while (parent_ceed != curr_ceed) {
450       curr_ceed = parent_ceed;
451       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
452     }
453     const char *resource;
454     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
455     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
456         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
457       CeedSize l_size;
458       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
459 
460       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
461         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
462                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
463       }
464     }
465 
466     // Copy data
467     switch (copy_mode) {
468       case CEED_COPY_VALUES:
469         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
470         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
471         impl->offsets = impl->offsets_allocated;
472         break;
473       case CEED_OWN_POINTER:
474         impl->offsets_allocated = (CeedInt *)offsets;
475         impl->offsets           = impl->offsets_allocated;
476         break;
477       case CEED_USE_POINTER:
478         impl->offsets = offsets;
479     }
480 
481     // Orientation data
482     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
483       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
484       switch (copy_mode) {
485         case CEED_COPY_VALUES:
486           CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated));
487           memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0]));
488           impl->orients = impl->orients_allocated;
489           break;
490         case CEED_OWN_POINTER:
491           impl->orients_allocated = (bool *)orients;
492           impl->orients           = impl->orients_allocated;
493           break;
494         case CEED_USE_POINTER:
495           impl->orients = orients;
496       }
497     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
498       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
499       switch (copy_mode) {
500         case CEED_COPY_VALUES:
501           CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated));
502           memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0]));
503           impl->curl_orients = impl->curl_orients_allocated;
504           break;
505         case CEED_OWN_POINTER:
506           impl->curl_orients_allocated = (CeedInt *)curl_orients;
507           impl->curl_orients           = impl->curl_orients_allocated;
508           break;
509         case CEED_USE_POINTER:
510           impl->curl_orients = curl_orients;
511       }
512     }
513   }
514 
515   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
516   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
517   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
518   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
519   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
520   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
521   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
522   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
523   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
524   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
525 
526   // Set apply function based upon num_comp, blk_size, and comp_stride
527   CeedInt idx = -1;
528   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
529   switch (idx) {
530     case 110:
531       impl->Apply = CeedElemRestrictionApply_Ref_110;
532       break;
533     case 111:
534       impl->Apply = CeedElemRestrictionApply_Ref_111;
535       break;
536     case 180:
537       impl->Apply = CeedElemRestrictionApply_Ref_180;
538       break;
539     case 181:
540       impl->Apply = CeedElemRestrictionApply_Ref_181;
541       break;
542     case 310:
543       impl->Apply = CeedElemRestrictionApply_Ref_310;
544       break;
545     case 311:
546       impl->Apply = CeedElemRestrictionApply_Ref_311;
547       break;
548     case 380:
549       impl->Apply = CeedElemRestrictionApply_Ref_380;
550       break;
551     case 381:
552       impl->Apply = CeedElemRestrictionApply_Ref_381;
553       break;
554     // LCOV_EXCL_START
555     case 510:
556       impl->Apply = CeedElemRestrictionApply_Ref_510;
557       break;
558     // LCOV_EXCL_STOP
559     case 511:
560       impl->Apply = CeedElemRestrictionApply_Ref_511;
561       break;
562     // LCOV_EXCL_START
563     case 580:
564       impl->Apply = CeedElemRestrictionApply_Ref_580;
565       break;
566     // LCOV_EXCL_STOP
567     case 581:
568       impl->Apply = CeedElemRestrictionApply_Ref_581;
569       break;
570     default:
571       impl->Apply = CeedElemRestrictionApply_Ref_Core;
572       break;
573   }
574 
575   return CEED_ERROR_SUCCESS;
576 }
577 
578 //------------------------------------------------------------------------------
579