xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 0c73c0392b237e966a4ce5108053a931350d7505)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Core ElemRestriction Apply Code
18 //------------------------------------------------------------------------------
19 static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
20                                                                const CeedInt comp_stride, CeedInt start, CeedInt stop, bool use_orients, CeedVector u,
21                                                                CeedVector v, CeedRequest *request) {
22   Ceed ceed;
23   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
24   CeedElemRestriction_Ref *impl;
25   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
26   const CeedScalar *uu;
27   CeedScalar       *vv;
28   CeedInt           num_elem, elem_size, v_offset;
29   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
30   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
31   v_offset = start * blk_size * elem_size * num_comp;
32   CeedRestrictionType rstr_type;
33   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
34 
35   // Restriction from L-vector to E-vector
36   // Perform: v = r * u
37   // vv has shape [elem_size, num_comp, num_elem], row-major
38   // uu has shape [nnodes, num_comp]
39   // Overwrite for notranspose mode
40   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
41   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
42   switch (rstr_type) {
43     case CEED_RESTRICTION_STRIDED: {
44       // No offsets provided, Identity Restriction
45       bool has_backend_strides;
46       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
47       if (has_backend_strides) {
48         // CPU backend strides are {1, elem_size, elem_size*num_comp}
49         // This if branch is left separate to allow better inlining
50         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
51           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
52             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
53               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
54                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
55                     uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
56               }
57             }
58           }
59         }
60       } else {
61         // User provided strides
62         CeedInt strides[3];
63         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
64         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
65           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
66             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
67               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
68                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
69                     uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
70               }
71             }
72           }
73         }
74       }
75     } break;
76     case CEED_RESTRICTION_DEFAULT: {
77       // Default restriction with offsets
78       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
79         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
80           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
81             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
82           }
83         }
84       }
85     } break;
86     case CEED_RESTRICTION_ORIENTED: {
87       // Restriction with orientations
88       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
89         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
90           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
91             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] =
92                 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (use_orients && impl->orients[i + e * elem_size] ? -1.0 : 1.0);
93           }
94         }
95       }
96     } break;
97     case CEED_RESTRICTION_CURL_ORIENTED: {
98       // Restriction with tridiagonal transformation
99       CeedCheck(elem_size > 1, ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_CURL_ORIENTED should always have elem_size > 1");
100       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
101         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
102           CeedInt n = 0;
103           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
104             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
105                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
106                     (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]
107                                  : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) +
108                 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
109                     (use_orients ? impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]
110                                  : abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]));
111           }
112           for (n = 1; n < elem_size - 1; n++) {
113             CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
114               vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
115                   uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
116                       (use_orients ? impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]
117                                    : abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size])) +
118                   uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
119                       (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]
120                                    : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) +
121                   uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
122                       (use_orients ? impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]
123                                    : abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]));
124             }
125           }
126           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
127             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
128                 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
129                     (use_orients ? impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]
130                                  : abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size])) +
131                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
132                     (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]
133                                  : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]));
134           }
135         }
136       }
137     } break;
138   }
139   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
140   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
141   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 //------------------------------------------------------------------------------
146 // Core ElemRestriction Apply Transpose Code
147 //------------------------------------------------------------------------------
148 static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
149                                                              const CeedInt comp_stride, CeedInt start, CeedInt stop, bool use_orients, CeedVector u,
150                                                              CeedVector v, CeedRequest *request) {
151   Ceed ceed;
152   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
153   CeedElemRestriction_Ref *impl;
154   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
155   const CeedScalar *uu;
156   CeedScalar       *vv;
157   CeedInt           num_elem, elem_size, v_offset;
158   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
159   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
160   v_offset = start * blk_size * elem_size * num_comp;
161   CeedRestrictionType rstr_type;
162   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
163 
164   // Restriction from E-vector to L-vector
165   // Performing v += r^T * u
166   // uu has shape [elem_size, num_comp, num_elem], row-major
167   // vv has shape [nnodes, num_comp]
168   // Sum into for transpose mode
169   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
170   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
171   switch (rstr_type) {
172     case CEED_RESTRICTION_STRIDED: {
173       // No offsets provided, Identity Restriction
174       bool has_backend_strides;
175       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
176       if (has_backend_strides) {
177         // CPU backend strides are {1, elem_size, elem_size*num_comp}
178         // This if brach is left separate to allow better inlining
179         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
180           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
181             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
182               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
183                 vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
184                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
185               }
186             }
187           }
188         }
189       } else {
190         // User provided strides
191         CeedInt strides[3];
192         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
193         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
194           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
195             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
196               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
197                 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
198                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
199               }
200             }
201           }
202         }
203       }
204     } break;
205     case CEED_RESTRICTION_DEFAULT: {
206       // Default restriction with offsets
207       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
208         for (CeedInt k = 0; k < num_comp; k++) {
209           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
210             // Iteration bound set to discard padding elements
211             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
212               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
213             }
214           }
215         }
216       }
217     } break;
218     case CEED_RESTRICTION_ORIENTED: {
219       // Restriction with orientations
220       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
221         for (CeedInt k = 0; k < num_comp; k++) {
222           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
223             // Iteration bound set to discard padding elements
224             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
225               vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
226                   uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0);
227             }
228           }
229         }
230       }
231     } break;
232     case CEED_RESTRICTION_CURL_ORIENTED: {
233       // Restriction with tridiagonal transformation
234       CeedCheck(elem_size > 1, ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_CURL_ORIENTED should always have elem_size > 1");
235       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
236         for (CeedInt k = 0; k < num_comp; k++) {
237           // Iteration bound set to discard padding elements
238           CeedInt    blk_end = CeedIntMin(blk_size, num_elem - e), n = 0;
239           CeedScalar vvn[blk_size];
240           for (CeedInt j = 0; j < blk_end; j++) {
241             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
242                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
243                     (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]
244                                  : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) +
245                 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
246                     (use_orients ? impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]
247                                  : abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]));
248           }
249           for (n = 1; n < elem_size - 1; n++) {
250             for (CeedInt j = 0; j < blk_end; j++) {
251               vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
252                   uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
253                       (use_orients ? impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]
254                                    : abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size])) +
255                   uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
256                       (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]
257                                    : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) +
258                   uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
259                       (use_orients ? impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]
260                                    : abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]));
261             }
262           }
263           for (CeedInt j = 0; j < blk_end; j++) {
264             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
265                 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
266                     (use_orients ? impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]
267                                  : abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size])) +
268                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
269                     (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]
270                                  : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]));
271           }
272         }
273       }
274     } break;
275   }
276   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
277   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
278   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
279   return CEED_ERROR_SUCCESS;
280 }
281 
282 //------------------------------------------------------------------------------
283 // ElemRestriction Apply - Common Sizes
284 //------------------------------------------------------------------------------
285 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
286                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
287                                             CeedRequest *request) {
288   if (t_mode == CEED_TRANSPOSE) {
289     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, u, v, request);
290   } else {
291     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, u, v, request);
292   }
293 }
294 
295 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
296                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
297                                             CeedRequest *request) {
298   if (t_mode == CEED_TRANSPOSE) {
299     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 1, 1, start, stop, use_orients, u, v, request);
300   } else {
301     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 1, 1, start, stop, use_orients, u, v, request);
302   }
303 }
304 
305 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
306                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
307                                             CeedRequest *request) {
308   if (t_mode == CEED_TRANSPOSE) {
309     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, u, v, request);
310   } else {
311     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, u, v, request);
312   }
313 }
314 
315 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
316                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
317                                             CeedRequest *request) {
318   if (t_mode == CEED_TRANSPOSE) {
319     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 8, 1, start, stop, use_orients, u, v, request);
320   } else {
321     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 8, 1, start, stop, use_orients, u, v, request);
322   }
323 }
324 
325 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
326                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
327                                             CeedRequest *request) {
328   if (t_mode == CEED_TRANSPOSE) {
329     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, u, v, request);
330   } else {
331     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, u, v, request);
332   }
333 }
334 
335 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
336                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
337                                             CeedRequest *request) {
338   if (t_mode == CEED_TRANSPOSE) {
339     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 1, 1, start, stop, use_orients, u, v, request);
340   } else {
341     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 1, 1, start, stop, use_orients, u, v, request);
342   }
343 }
344 
345 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
346                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
347                                             CeedRequest *request) {
348   if (t_mode == CEED_TRANSPOSE) {
349     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, u, v, request);
350   } else {
351     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, u, v, request);
352   }
353 }
354 
355 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
356                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
357                                             CeedRequest *request) {
358   if (t_mode == CEED_TRANSPOSE) {
359     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 8, 1, start, stop, use_orients, u, v, request);
360   } else {
361     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 8, 1, start, stop, use_orients, u, v, request);
362   }
363 }
364 
365 // LCOV_EXCL_START
366 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
367                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
368                                             CeedRequest *request) {
369   if (t_mode == CEED_TRANSPOSE) {
370     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, u, v, request);
371   } else {
372     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, u, v, request);
373   }
374 }
375 // LCOV_EXCL_STOP
376 
377 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
378                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
379                                             CeedRequest *request) {
380   if (t_mode == CEED_TRANSPOSE) {
381     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 1, 1, start, stop, use_orients, u, v, request);
382   } else {
383     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 1, 1, start, stop, use_orients, u, v, request);
384   }
385 }
386 
387 // LCOV_EXCL_START
388 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
389                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
390                                             CeedRequest *request) {
391   if (t_mode == CEED_TRANSPOSE) {
392     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, u, v, request);
393   } else {
394     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, u, v, request);
395   }
396 }
397 // LCOV_EXCL_STOP
398 
399 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
400                                             CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
401                                             CeedRequest *request) {
402   if (t_mode == CEED_TRANSPOSE) {
403     return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 8, 1, start, stop, use_orients, u, v, request);
404   } else {
405     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 8, 1, start, stop, use_orients, u, v, request);
406   }
407 }
408 
409 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
410                                                     CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u,
411                                                     CeedVector v, CeedRequest *request) {
412   if (t_mode == CEED_TRANSPOSE) {
413     return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, use_orients, u, v, request);
414   } else {
415     return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, use_orients, u, v, request);
416   }
417 }
418 
419 //------------------------------------------------------------------------------
420 // ElemRestriction Apply
421 //------------------------------------------------------------------------------
422 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
423   CeedInt num_blk, blk_size, num_comp, comp_stride;
424   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
425   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
426   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
427   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
428   CeedElemRestriction_Ref *impl;
429   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
430 
431   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request);
432 }
433 
434 //------------------------------------------------------------------------------
435 // ElemRestriction Apply Unsigned
436 //------------------------------------------------------------------------------
437 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
438   CeedInt num_blk, blk_size, num_comp, comp_stride;
439   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
440   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
441   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
442   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
443   CeedElemRestriction_Ref *impl;
444   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
445 
446   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request);
447 }
448 
449 //------------------------------------------------------------------------------
450 // ElemRestriction Apply Block
451 //------------------------------------------------------------------------------
452 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
453                                              CeedRequest *request) {
454   CeedInt blk_size, num_comp, comp_stride;
455   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
456   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
457   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
458   CeedElemRestriction_Ref *impl;
459   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
460 
461   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request);
462 }
463 
464 //------------------------------------------------------------------------------
465 // ElemRestriction Get Offsets
466 //------------------------------------------------------------------------------
467 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
468   CeedElemRestriction_Ref *impl;
469   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
470   Ceed ceed;
471   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
472 
473   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
474 
475   *offsets = impl->offsets;
476   return CEED_ERROR_SUCCESS;
477 }
478 
479 //------------------------------------------------------------------------------
480 // ElemRestriction Get Orientations
481 //------------------------------------------------------------------------------
482 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
483   CeedElemRestriction_Ref *impl;
484   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
485   Ceed ceed;
486   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
487 
488   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
489 
490   *orients = impl->orients;
491   return CEED_ERROR_SUCCESS;
492 }
493 
494 //------------------------------------------------------------------------------
495 // ElemRestriction Get Curl-Conforming Orientations
496 //------------------------------------------------------------------------------
497 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
498   CeedElemRestriction_Ref *impl;
499   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
500   Ceed ceed;
501   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
502 
503   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
504 
505   *curl_orients = impl->curl_orients;
506   return CEED_ERROR_SUCCESS;
507 }
508 
509 //------------------------------------------------------------------------------
510 // ElemRestriction Destroy
511 //------------------------------------------------------------------------------
512 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
513   CeedElemRestriction_Ref *impl;
514   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
515 
516   CeedCallBackend(CeedFree(&impl->offsets_allocated));
517   CeedCallBackend(CeedFree(&impl->orients_allocated));
518   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
519   CeedCallBackend(CeedFree(&impl));
520   return CEED_ERROR_SUCCESS;
521 }
522 
523 //------------------------------------------------------------------------------
524 // ElemRestriction Create
525 //------------------------------------------------------------------------------
526 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
527                                   const CeedInt8 *curl_orients, CeedElemRestriction r) {
528   CeedElemRestriction_Ref *impl;
529   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
530   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
531   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
532   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
533   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
534   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
535   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
536   Ceed ceed;
537   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
538 
539   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
540   CeedCallBackend(CeedCalloc(1, &impl));
541 
542   // Offsets data
543   CeedRestrictionType rstr_type;
544   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
545   if (rstr_type != CEED_RESTRICTION_STRIDED) {
546     // Check indices for ref or memcheck backends
547     Ceed parent_ceed = ceed, curr_ceed = NULL;
548     while (parent_ceed != curr_ceed) {
549       curr_ceed = parent_ceed;
550       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
551     }
552     const char *resource;
553     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
554     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
555         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
556       CeedSize l_size;
557       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
558 
559       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
560         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
561                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
562       }
563     }
564 
565     // Copy data
566     switch (copy_mode) {
567       case CEED_COPY_VALUES:
568         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
569         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
570         impl->offsets = impl->offsets_allocated;
571         break;
572       case CEED_OWN_POINTER:
573         impl->offsets_allocated = (CeedInt *)offsets;
574         impl->offsets           = impl->offsets_allocated;
575         break;
576       case CEED_USE_POINTER:
577         impl->offsets = offsets;
578     }
579 
580     // Orientation data
581     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
582       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
583       switch (copy_mode) {
584         case CEED_COPY_VALUES:
585           CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated));
586           memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0]));
587           impl->orients = impl->orients_allocated;
588           break;
589         case CEED_OWN_POINTER:
590           impl->orients_allocated = (bool *)orients;
591           impl->orients           = impl->orients_allocated;
592           break;
593         case CEED_USE_POINTER:
594           impl->orients = orients;
595       }
596     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
597       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
598       switch (copy_mode) {
599         case CEED_COPY_VALUES:
600           CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated));
601           memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0]));
602           impl->curl_orients = impl->curl_orients_allocated;
603           break;
604         case CEED_OWN_POINTER:
605           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
606           impl->curl_orients           = impl->curl_orients_allocated;
607           break;
608         case CEED_USE_POINTER:
609           impl->curl_orients = curl_orients;
610       }
611     }
612   }
613 
614   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
615   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
616   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
617   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
618   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
619   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
620   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
621   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
622   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
623   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
624 
625   // Set apply function based upon num_comp, blk_size, and comp_stride
626   CeedInt idx = -1;
627   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
628   switch (idx) {
629     case 110:
630       impl->Apply = CeedElemRestrictionApply_Ref_110;
631       break;
632     case 111:
633       impl->Apply = CeedElemRestrictionApply_Ref_111;
634       break;
635     case 180:
636       impl->Apply = CeedElemRestrictionApply_Ref_180;
637       break;
638     case 181:
639       impl->Apply = CeedElemRestrictionApply_Ref_181;
640       break;
641     case 310:
642       impl->Apply = CeedElemRestrictionApply_Ref_310;
643       break;
644     case 311:
645       impl->Apply = CeedElemRestrictionApply_Ref_311;
646       break;
647     case 380:
648       impl->Apply = CeedElemRestrictionApply_Ref_380;
649       break;
650     case 381:
651       impl->Apply = CeedElemRestrictionApply_Ref_381;
652       break;
653     // LCOV_EXCL_START
654     case 510:
655       impl->Apply = CeedElemRestrictionApply_Ref_510;
656       break;
657     // LCOV_EXCL_STOP
658     case 511:
659       impl->Apply = CeedElemRestrictionApply_Ref_511;
660       break;
661     // LCOV_EXCL_START
662     case 580:
663       impl->Apply = CeedElemRestrictionApply_Ref_580;
664       break;
665     // LCOV_EXCL_STOP
666     case 581:
667       impl->Apply = CeedElemRestrictionApply_Ref_581;
668       break;
669     default:
670       impl->Apply = CeedElemRestrictionApply_Ref_Core;
671       break;
672   }
673 
674   return CEED_ERROR_SUCCESS;
675 }
676 
677 //------------------------------------------------------------------------------
678