xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision fcbe8c069ccd22d836386af4823e4c7d84ee0a69)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
19 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
20                                                     CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u,
21                                                     CeedVector v, CeedRequest *request) {
22   CeedElemRestriction_Ref *impl;
23   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
24   const CeedScalar *uu;
25   CeedScalar       *vv;
26   CeedInt           num_elem, elem_size, v_offset;
27   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
28   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
29   v_offset = start * blk_size * elem_size * num_comp;
30   CeedRestrictionType rstr_type;
31   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
32 
33   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
34   if (t_mode == CEED_TRANSPOSE) {
35     // Sum into for transpose mode, e-vec to l-vec
36     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
37   } else {
38     // Overwrite for notranspose mode, l-vec to e-vec
39     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
40   }
41   // Restriction from L-vector to E-vector
42   // Perform: v = r * u
43   // vv has shape [elem_size, num_comp, num_elem], row-major
44   // uu has shape [nnodes, num_comp]
45   if (t_mode == CEED_NOTRANSPOSE) {
46     switch (rstr_type) {
47       case CEED_RESTRICTION_STRIDED: {
48         // No offsets provided, Identity Restriction
49         bool has_backend_strides;
50         CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
51         if (has_backend_strides) {
52           // CPU backend strides are {1, elem_size, elem_size*num_comp}
53           // This if branch is left separate to allow better inlining
54           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
55             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
56               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
57                 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
58                   vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
59                       uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
60                 }
61               }
62             }
63           }
64         } else {
65           // User provided strides
66           CeedInt strides[3];
67           CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
68           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
69             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
70               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
71                 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
72                   vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
73                       uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
74                 }
75               }
76             }
77           }
78         }
79       } break;
80       case CEED_RESTRICTION_DEFAULT:
81         // Default restriction with offsets
82         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
83           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
84             CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
85               vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = uu[impl->offsets[i + elem_size * e] + k * comp_stride];
86             }
87           }
88         }
89         break;
90       case CEED_RESTRICTION_ORIENTED:
91         // Restriction with orientations
92         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
93           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
94             CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
95               vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
96                   uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (use_orients && impl->orients[i + elem_size * e] ? -1.0 : 1.0);
97             }
98           }
99         }
100         break;
101       case CEED_RESTRICTION_CURL_ORIENTED:
102         // Restriction with tridiagonal transformation
103         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
104           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
105             CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
106               CeedInt ii = i % elem_size;
107               if (use_orients) {
108                 if (ii == 0) {
109                   vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
110                       uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 1 + 3 * elem_size * e] +
111                       uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 2 + 3 * elem_size * e];
112                 } else if (ii == elem_size - 1) {
113                   vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
114                       uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 0 + 3 * elem_size * e] +
115                       uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 1 + 3 * elem_size * e];
116                 } else {
117                   vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
118                       uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 0 + 3 * elem_size * e] +
119                       uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 1 + 3 * elem_size * e] +
120                       uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 2 + 3 * elem_size * e];
121                 }
122               } else {
123                 if (ii == 0) {
124                   vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
125                       uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 1 + 3 * elem_size * e]) +
126                       uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 2 + 3 * elem_size * e]);
127                 } else if (ii == elem_size - 1) {
128                   vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
129                       uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 0 + 3 * elem_size * e]) +
130                       uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 1 + 3 * elem_size * e]);
131                 } else {
132                   vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
133                       uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 0 + 3 * elem_size * e]) +
134                       uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 1 + 3 * elem_size * e]) +
135                       uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 2 + 3 * elem_size * e]);
136                 }
137               }
138             }
139           }
140         }
141         break;
142     }
143   } else {
144     // Restriction from E-vector to L-vector
145     // Performing v += r^T * u
146     // uu has shape [elem_size, num_comp, num_elem]
147     // vv has shape [nnodes, num_comp]
148     switch (rstr_type) {
149       case CEED_RESTRICTION_STRIDED: {
150         // No offsets provided, Identity Restriction
151         bool has_backend_strides;
152         CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
153         if (has_backend_strides) {
154           // CPU backend strides are {1, elem_size, elem_size*num_comp}
155           // This if brach is left separate to allow better inlining
156           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
157             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
158               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
159                 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
160                   vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
161                       uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
162                 }
163               }
164             }
165           }
166         } else {
167           // User provided strides
168           CeedInt strides[3];
169           CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
170           for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
171             CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
172               CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
173                 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
174                   vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
175                       uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
176                 }
177               }
178             }
179           }
180         }
181       } break;
182       case CEED_RESTRICTION_DEFAULT:
183         // Default restriction with offsets
184         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
185           for (CeedInt k = 0; k < num_comp; k++) {
186             for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
187               // Iteration bound set to discard padding elements
188               for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
189                 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset];
190               }
191             }
192           }
193         }
194         break;
195       case CEED_RESTRICTION_ORIENTED:
196         // Restriction with orientations
197         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
198           for (CeedInt k = 0; k < num_comp; k++) {
199             for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
200               // Iteration bound set to discard padding elements
201               for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
202                 vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
203                     uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0);
204               }
205             }
206           }
207         }
208         break;
209       case CEED_RESTRICTION_CURL_ORIENTED:
210         // Restriction with tridiagonal transformation
211         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
212           for (CeedInt k = 0; k < num_comp; k++) {
213             for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
214               // Iteration bound set to discard padding elements
215               for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
216                 CeedInt jj = j % elem_size;
217                 if (use_orients) {
218                   if (jj == 0) {
219                     vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
220                         uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size] +
221                         uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] * impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size];
222                   } else if (jj == elem_size - 1) {
223                     vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
224                         uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] * impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size] +
225                         uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size];
226                   } else {
227                     vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
228                         uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] * impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size] +
229                         uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size] +
230                         uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] * impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size];
231                   }
232                 } else {
233                   if (jj == 0) {
234                     vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] *
235                                                                                   abs(impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]) +
236                                                                               uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] *
237                                                                                   abs(impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size]);
238                   } else if (jj == elem_size - 1) {
239                     vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] *
240                                                                                   abs(impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size]) +
241                                                                               uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] *
242                                                                                   abs(impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]);
243                   } else {
244                     vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] *
245                                                                                   abs(impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size]) +
246                                                                               uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] *
247                                                                                   abs(impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]) +
248                                                                               uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] *
249                                                                                   abs(impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size]);
250                   }
251                 }
252               }
253             }
254           }
255         }
256         break;
257     }
258   }
259   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
260   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
261   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
262   return CEED_ERROR_SUCCESS;
263 }
264 
265 //------------------------------------------------------------------------------
266 // ElemRestriction Apply - Common Sizes
267 //------------------------------------------------------------------------------
268 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
269                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
270                                             CeedRequest *request) {
271   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request);
272 }
273 
274 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
275                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
276                                             CeedRequest *request) {
277   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orients, t_mode, u, v, request);
278 }
279 
280 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
281                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
282                                             CeedRequest *request) {
283   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request);
284 }
285 
286 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
287                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
288                                             CeedRequest *request) {
289   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orients, t_mode, u, v, request);
290 }
291 
292 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
293                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
294                                             CeedRequest *request) {
295   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request);
296 }
297 
298 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
299                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
300                                             CeedRequest *request) {
301   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orients, t_mode, u, v, request);
302 }
303 
304 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
305                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
306                                             CeedRequest *request) {
307   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request);
308 }
309 
310 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
311                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
312                                             CeedRequest *request) {
313   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orients, t_mode, u, v, request);
314 }
315 
316 // LCOV_EXCL_START
317 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
318                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
319                                             CeedRequest *request) {
320   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request);
321 }
322 // LCOV_EXCL_STOP
323 
324 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
325                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
326                                             CeedRequest *request) {
327   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orients, t_mode, u, v, request);
328 }
329 
330 // LCOV_EXCL_START
331 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
332                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
333                                             CeedRequest *request) {
334   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request);
335 }
336 // LCOV_EXCL_STOP
337 
338 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
339                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
340                                             CeedRequest *request) {
341   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orients, t_mode, u, v, request);
342 }
343 
344 //------------------------------------------------------------------------------
345 // ElemRestriction Apply
346 //------------------------------------------------------------------------------
347 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
348   CeedInt num_blk, blk_size, num_comp, comp_stride;
349   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
350   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
351   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
352   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
353   CeedElemRestriction_Ref *impl;
354   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
355 
356   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request);
357 }
358 
359 //------------------------------------------------------------------------------
360 // ElemRestriction Apply Unsigned
361 //------------------------------------------------------------------------------
362 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
363   CeedInt num_blk, blk_size, num_comp, comp_stride;
364   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
365   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
366   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
367   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
368   CeedElemRestriction_Ref *impl;
369   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
370 
371   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request);
372 }
373 
374 //------------------------------------------------------------------------------
375 // ElemRestriction Apply Block
376 //------------------------------------------------------------------------------
377 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
378                                              CeedRequest *request) {
379   CeedInt blk_size, num_comp, comp_stride;
380   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
381   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
382   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
383   CeedElemRestriction_Ref *impl;
384   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
385 
386   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request);
387 }
388 
389 //------------------------------------------------------------------------------
390 // ElemRestriction Get Offsets
391 //------------------------------------------------------------------------------
392 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
393   CeedElemRestriction_Ref *impl;
394   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
395   Ceed ceed;
396   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
397 
398   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
399 
400   *offsets = impl->offsets;
401   return CEED_ERROR_SUCCESS;
402 }
403 
404 //------------------------------------------------------------------------------
405 // ElemRestriction Get Orientations
406 //------------------------------------------------------------------------------
407 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
408   CeedElemRestriction_Ref *impl;
409   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
410   Ceed ceed;
411   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
412 
413   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
414 
415   *orients = impl->orients;
416   return CEED_ERROR_SUCCESS;
417 }
418 
419 //------------------------------------------------------------------------------
420 // ElemRestriction Get Curl-Conforming Orientations
421 //------------------------------------------------------------------------------
422 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **curl_orients) {
423   CeedElemRestriction_Ref *impl;
424   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
425   Ceed ceed;
426   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
427 
428   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
429 
430   *curl_orients = impl->curl_orients;
431   return CEED_ERROR_SUCCESS;
432 }
433 
434 //------------------------------------------------------------------------------
435 // ElemRestriction Destroy
436 //------------------------------------------------------------------------------
437 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
438   CeedElemRestriction_Ref *impl;
439   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
440 
441   CeedCallBackend(CeedFree(&impl->offsets_allocated));
442   CeedCallBackend(CeedFree(&impl->orients_allocated));
443   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
444   CeedCallBackend(CeedFree(&impl));
445   return CEED_ERROR_SUCCESS;
446 }
447 
448 //------------------------------------------------------------------------------
449 // ElemRestriction Create
450 //------------------------------------------------------------------------------
451 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
452                                   const CeedInt *curl_orients, CeedElemRestriction r) {
453   CeedElemRestriction_Ref *impl;
454   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
455   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
456   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
457   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
458   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
459   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
460   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
461   Ceed ceed;
462   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
463 
464   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
465   CeedCallBackend(CeedCalloc(1, &impl));
466 
467   // Offsets data
468   CeedRestrictionType rstr_type;
469   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
470   if (rstr_type != CEED_RESTRICTION_STRIDED) {
471     // Check indices for ref or memcheck backends
472     Ceed parent_ceed = ceed, curr_ceed = NULL;
473     while (parent_ceed != curr_ceed) {
474       curr_ceed = parent_ceed;
475       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
476     }
477     const char *resource;
478     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
479     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
480         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
481       CeedSize l_size;
482       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
483 
484       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
485         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
486                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
487       }
488     }
489 
490     // Copy data
491     switch (copy_mode) {
492       case CEED_COPY_VALUES:
493         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
494         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
495         impl->offsets = impl->offsets_allocated;
496         break;
497       case CEED_OWN_POINTER:
498         impl->offsets_allocated = (CeedInt *)offsets;
499         impl->offsets           = impl->offsets_allocated;
500         break;
501       case CEED_USE_POINTER:
502         impl->offsets = offsets;
503     }
504 
505     // Orientation data
506     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
507       switch (copy_mode) {
508         case CEED_COPY_VALUES:
509           CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated));
510           memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0]));
511           impl->orients = impl->orients_allocated;
512           break;
513         case CEED_OWN_POINTER:
514           impl->orients_allocated = (bool *)orients;
515           impl->orients           = impl->orients_allocated;
516           break;
517         case CEED_USE_POINTER:
518           impl->orients = orients;
519       }
520     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
521       switch (copy_mode) {
522         case CEED_COPY_VALUES:
523           CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated));
524           memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0]));
525           impl->curl_orients = impl->curl_orients_allocated;
526           break;
527         case CEED_OWN_POINTER:
528           impl->curl_orients_allocated = (CeedInt *)curl_orients;
529           impl->curl_orients           = impl->curl_orients_allocated;
530           break;
531         case CEED_USE_POINTER:
532           impl->curl_orients = curl_orients;
533       }
534     }
535   }
536 
537   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
538   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
539   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
540   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
541   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
542   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
543   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
544   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
545   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
546   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
547 
548   // Set apply function based upon num_comp, blk_size, and comp_stride
549   CeedInt idx = -1;
550   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
551   switch (idx) {
552     case 110:
553       impl->Apply = CeedElemRestrictionApply_Ref_110;
554       break;
555     case 111:
556       impl->Apply = CeedElemRestrictionApply_Ref_111;
557       break;
558     case 180:
559       impl->Apply = CeedElemRestrictionApply_Ref_180;
560       break;
561     case 181:
562       impl->Apply = CeedElemRestrictionApply_Ref_181;
563       break;
564     case 310:
565       impl->Apply = CeedElemRestrictionApply_Ref_310;
566       break;
567     case 311:
568       impl->Apply = CeedElemRestrictionApply_Ref_311;
569       break;
570     case 380:
571       impl->Apply = CeedElemRestrictionApply_Ref_380;
572       break;
573     case 381:
574       impl->Apply = CeedElemRestrictionApply_Ref_381;
575       break;
576     // LCOV_EXCL_START
577     case 510:
578       impl->Apply = CeedElemRestrictionApply_Ref_510;
579       break;
580     // LCOV_EXCL_STOP
581     case 511:
582       impl->Apply = CeedElemRestrictionApply_Ref_511;
583       break;
584     // LCOV_EXCL_START
585     case 580:
586       impl->Apply = CeedElemRestrictionApply_Ref_580;
587       break;
588     // LCOV_EXCL_STOP
589     case 581:
590       impl->Apply = CeedElemRestrictionApply_Ref_581;
591       break;
592     default:
593       impl->Apply = CeedElemRestrictionApply_Ref_Core;
594       break;
595   }
596 
597   return CEED_ERROR_SUCCESS;
598 }
599 
600 //------------------------------------------------------------------------------
601