xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 171d97d0be1a6a7597730b8cc2bec09fb6a98aac)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
321617c04Sjeremylt //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
521617c04Sjeremylt //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
721617c04Sjeremylt 
849aac155SJeremy L Thompson #include <ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
11fcbe8c06SSebastian Grimberg #include <stdlib.h>
123d576824SJeremy L Thompson #include <string.h>
132b730f8bSJeremy L Thompson 
1421617c04Sjeremylt #include "ceed-ref.h"
1521617c04Sjeremylt 
16f10650afSjeremylt //------------------------------------------------------------------------------
17f10650afSjeremylt // Core ElemRestriction Apply Code
18f10650afSjeremylt //------------------------------------------------------------------------------
191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
2094648b7dSSebastian Grimberg                                                                       CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
21*171d97d0SJeremy L Thompson                                                                       CeedSize v_offset, const CeedScalar *__restrict__ uu,
22eda0adbcSSebastian Grimberg                                                                       CeedScalar *__restrict__ vv) {
2394648b7dSSebastian Grimberg   // No offsets provided, identity restriction
24d1d35e2fSjeremylt   bool has_backend_strides;
25ad70ee2cSJeremy L Thompson 
261cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
27d1d35e2fSjeremylt   if (has_backend_strides) {
28d1d35e2fSjeremylt     // CPU backend strides are {1, elem_size, elem_size*num_comp}
297f90ec76Sjeremylt     // This if branch is left separate to allow better inlining
30ad70ee2cSJeremy L Thompson     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
312b730f8bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
322b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
33ad70ee2cSJeremy L Thompson           CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
34*171d97d0SJeremy L Thompson             vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
35*171d97d0SJeremy L Thompson                 uu[n + k * (CeedSize)elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp];
362b730f8bSJeremy L Thompson           }
372b730f8bSJeremy L Thompson         }
382b730f8bSJeremy L Thompson       }
392b730f8bSJeremy L Thompson     }
407f90ec76Sjeremylt   } else {
417f90ec76Sjeremylt     // User provided strides
427f90ec76Sjeremylt     CeedInt strides[3];
43ad70ee2cSJeremy L Thompson 
4456c48462SJeremy L Thompson     CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
45ad70ee2cSJeremy L Thompson     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
462b730f8bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
472b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
48ad70ee2cSJeremy L Thompson           CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
49*171d97d0SJeremy L Thompson             vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
50*171d97d0SJeremy L Thompson                 uu[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]];
512b730f8bSJeremy L Thompson           }
522b730f8bSJeremy L Thompson         }
532b730f8bSJeremy L Thompson       }
542b730f8bSJeremy L Thompson     }
557509a596Sjeremylt   }
5694648b7dSSebastian Grimberg   return CEED_ERROR_SUCCESS;
5794648b7dSSebastian Grimberg }
5894648b7dSSebastian Grimberg 
59eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
6094648b7dSSebastian Grimberg                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
61*171d97d0SJeremy L Thompson                                                                      CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
62eda0adbcSSebastian Grimberg                                                                      CeedScalar *__restrict__ vv) {
63fcbe8c06SSebastian Grimberg   // Default restriction with offsets
6494648b7dSSebastian Grimberg   CeedElemRestriction_Ref *impl;
65ad70ee2cSJeremy L Thompson 
661cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
67ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
682b730f8bSJeremy L Thompson     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
69ad70ee2cSJeremy L Thompson       CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) {
70*171d97d0SJeremy L Thompson         vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
71fcbe8c06SSebastian Grimberg       }
72fcbe8c06SSebastian Grimberg     }
73fcbe8c06SSebastian Grimberg   }
7494648b7dSSebastian Grimberg   return CEED_ERROR_SUCCESS;
7594648b7dSSebastian Grimberg }
7694648b7dSSebastian Grimberg 
771cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
7894648b7dSSebastian Grimberg                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
79*171d97d0SJeremy L Thompson                                                                        CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
80eda0adbcSSebastian Grimberg                                                                        CeedScalar *__restrict__ vv) {
81fcbe8c06SSebastian Grimberg   // Restriction with orientations
8294648b7dSSebastian Grimberg   CeedElemRestriction_Ref *impl;
83ad70ee2cSJeremy L Thompson 
841cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
85ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
86fcbe8c06SSebastian Grimberg     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
87ad70ee2cSJeremy L Thompson       CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) {
88*171d97d0SJeremy L Thompson         vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] =
897c1dbaffSSebastian Grimberg             uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
90fcbe8c06SSebastian Grimberg       }
91fcbe8c06SSebastian Grimberg     }
92fcbe8c06SSebastian Grimberg   }
9394648b7dSSebastian Grimberg   return CEED_ERROR_SUCCESS;
9494648b7dSSebastian Grimberg }
9594648b7dSSebastian Grimberg 
961cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
9794648b7dSSebastian Grimberg                                                                            const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
98*171d97d0SJeremy L Thompson                                                                            CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
99eda0adbcSSebastian Grimberg                                                                            CeedScalar *__restrict__ vv) {
10077d1c127SSebastian Grimberg   // Restriction with tridiagonal transformation
10194648b7dSSebastian Grimberg   CeedElemRestriction_Ref *impl;
102ad70ee2cSJeremy L Thompson 
1031cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
104ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
105fcbe8c06SSebastian Grimberg     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1060c73c039SSebastian Grimberg       CeedInt n = 0;
1075c7e0f51SSebastian Grimberg 
108ad70ee2cSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
109*171d97d0SJeremy L Thompson         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
110ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
111ad70ee2cSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
112ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
113ad70ee2cSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
1140c73c039SSebastian Grimberg       }
1155c7e0f51SSebastian Grimberg       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
116ad70ee2cSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
117*171d97d0SJeremy L Thompson           vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
118ad70ee2cSJeremy L Thompson               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
119ad70ee2cSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
120ad70ee2cSJeremy L Thompson               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
121ad70ee2cSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
122ad70ee2cSJeremy L Thompson               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
123ad70ee2cSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
1240c73c039SSebastian Grimberg         }
1250c73c039SSebastian Grimberg       }
126ad70ee2cSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
127*171d97d0SJeremy L Thompson         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
128ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
129ad70ee2cSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
130ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
131ad70ee2cSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
1322b730f8bSJeremy L Thompson       }
1332b730f8bSJeremy L Thompson     }
1342b730f8bSJeremy L Thompson   }
1350c73c039SSebastian Grimberg   return CEED_ERROR_SUCCESS;
136fcbe8c06SSebastian Grimberg }
1370c73c039SSebastian Grimberg 
1381cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
139ad70ee2cSJeremy L Thompson                                                                                    const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
14094648b7dSSebastian Grimberg                                                                                    CeedInt stop, CeedInt num_elem, CeedInt elem_size,
141*171d97d0SJeremy L Thompson                                                                                    CeedSize v_offset, const CeedScalar *__restrict__ uu,
142eda0adbcSSebastian Grimberg                                                                                    CeedScalar *__restrict__ vv) {
14394648b7dSSebastian Grimberg   // Restriction with (unsigned) tridiagonal transformation
1440c73c039SSebastian Grimberg   CeedElemRestriction_Ref *impl;
145ad70ee2cSJeremy L Thompson 
1461cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
147ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
1487c1dbaffSSebastian Grimberg     CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1497c1dbaffSSebastian Grimberg       CeedInt n = 0;
150ad70ee2cSJeremy L Thompson 
151ad70ee2cSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
152*171d97d0SJeremy L Thompson         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
153ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
154ad70ee2cSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
155ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
156ad70ee2cSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
1577c1dbaffSSebastian Grimberg       }
1585c7e0f51SSebastian Grimberg       CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) {
159ad70ee2cSJeremy L Thompson         CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
160*171d97d0SJeremy L Thompson           vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
161ad70ee2cSJeremy L Thompson               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
162ad70ee2cSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
163ad70ee2cSJeremy L Thompson               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
164ad70ee2cSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
165ad70ee2cSJeremy L Thompson               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
166ad70ee2cSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
1677c1dbaffSSebastian Grimberg         }
1687c1dbaffSSebastian Grimberg       }
169ad70ee2cSJeremy L Thompson       CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) {
170*171d97d0SJeremy L Thompson         vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] =
171ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
172ad70ee2cSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
173ad70ee2cSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
174ad70ee2cSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
1757c1dbaffSSebastian Grimberg       }
1767c1dbaffSSebastian Grimberg     }
1777c1dbaffSSebastian Grimberg   }
1787c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
1797c1dbaffSSebastian Grimberg }
1807c1dbaffSSebastian Grimberg 
1811cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
18294648b7dSSebastian Grimberg                                                                     CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
183*171d97d0SJeremy L Thompson                                                                     CeedSize v_offset, const CeedScalar *__restrict__ uu,
184eda0adbcSSebastian Grimberg                                                                     CeedScalar *__restrict__ vv) {
18594648b7dSSebastian Grimberg   // No offsets provided, identity restriction
186d1d35e2fSjeremylt   bool has_backend_strides;
187ad70ee2cSJeremy L Thompson 
1881cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
189d1d35e2fSjeremylt   if (has_backend_strides) {
190d1d35e2fSjeremylt     // CPU backend strides are {1, elem_size, elem_size*num_comp}
1917f90ec76Sjeremylt     // This if brach is left separate to allow better inlining
192ad70ee2cSJeremy L Thompson     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
1932b730f8bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1942b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
195ad70ee2cSJeremy L Thompson           CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
196*171d97d0SJeremy L Thompson             vv[n + k * (CeedSize)elem_size + (e + j) * elem_size * (CeedSize)num_comp] +=
197*171d97d0SJeremy L Thompson                 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset];
1982b730f8bSJeremy L Thompson           }
1992b730f8bSJeremy L Thompson         }
2002b730f8bSJeremy L Thompson       }
2012b730f8bSJeremy L Thompson     }
2027f90ec76Sjeremylt   } else {
2037f90ec76Sjeremylt     // User provided strides
2047f90ec76Sjeremylt     CeedInt strides[3];
205ad70ee2cSJeremy L Thompson 
20656c48462SJeremy L Thompson     CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
207ad70ee2cSJeremy L Thompson     for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
2082b730f8bSJeremy L Thompson       CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
2092b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
210ad70ee2cSJeremy L Thompson           CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
211*171d97d0SJeremy L Thompson             vv[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + (e + j) * (CeedSize)strides[2]] +=
212*171d97d0SJeremy L Thompson                 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset];
2132b730f8bSJeremy L Thompson           }
2142b730f8bSJeremy L Thompson         }
2152b730f8bSJeremy L Thompson       }
2162b730f8bSJeremy L Thompson     }
217523b8ea0Sjeremylt   }
21894648b7dSSebastian Grimberg   return CEED_ERROR_SUCCESS;
21994648b7dSSebastian Grimberg }
22094648b7dSSebastian Grimberg 
221eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
22294648b7dSSebastian Grimberg                                                                    const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
223*171d97d0SJeremy L Thompson                                                                    CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
224eda0adbcSSebastian Grimberg                                                                    CeedScalar *__restrict__ vv) {
225fcbe8c06SSebastian Grimberg   // Default restriction with offsets
22694648b7dSSebastian Grimberg   CeedElemRestriction_Ref *impl;
227ad70ee2cSJeremy L Thompson 
2281cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
229ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
2302b730f8bSJeremy L Thompson     for (CeedInt k = 0; k < num_comp; k++) {
231ad70ee2cSJeremy L Thompson       for (CeedInt i = 0; i < elem_size * block_size; i += block_size) {
2328d94b059Sjeremylt         // Iteration bound set to discard padding elements
233ad70ee2cSJeremy L Thompson         for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
2345c7e0f51SSebastian Grimberg           CeedScalar vv_loc;
23558c07c4fSSebastian Grimberg 
236*171d97d0SJeremy L Thompson           vv_loc = uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset];
2375c7e0f51SSebastian Grimberg           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
238fcbe8c06SSebastian Grimberg         }
239fcbe8c06SSebastian Grimberg       }
240fcbe8c06SSebastian Grimberg     }
241fcbe8c06SSebastian Grimberg   }
24294648b7dSSebastian Grimberg   return CEED_ERROR_SUCCESS;
24394648b7dSSebastian Grimberg }
24494648b7dSSebastian Grimberg 
2451cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
24694648b7dSSebastian Grimberg                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
247*171d97d0SJeremy L Thompson                                                                      CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
248eda0adbcSSebastian Grimberg                                                                      CeedScalar *__restrict__ vv) {
249fcbe8c06SSebastian Grimberg   // Restriction with orientations
25094648b7dSSebastian Grimberg   CeedElemRestriction_Ref *impl;
251ad70ee2cSJeremy L Thompson 
2521cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
253ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
254fcbe8c06SSebastian Grimberg     for (CeedInt k = 0; k < num_comp; k++) {
255ad70ee2cSJeremy L Thompson       for (CeedInt i = 0; i < elem_size * block_size; i += block_size) {
256fcbe8c06SSebastian Grimberg         // Iteration bound set to discard padding elements
257ad70ee2cSJeremy L Thompson         for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
2585c7e0f51SSebastian Grimberg           CeedScalar vv_loc;
25958c07c4fSSebastian Grimberg 
260*171d97d0SJeremy L Thompson           vv_loc =
261*171d97d0SJeremy L Thompson               uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
2625c7e0f51SSebastian Grimberg           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
263fcbe8c06SSebastian Grimberg         }
264fcbe8c06SSebastian Grimberg       }
265fcbe8c06SSebastian Grimberg     }
266fcbe8c06SSebastian Grimberg   }
26794648b7dSSebastian Grimberg   return CEED_ERROR_SUCCESS;
26894648b7dSSebastian Grimberg }
26994648b7dSSebastian Grimberg 
2701cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
27194648b7dSSebastian Grimberg                                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
272*171d97d0SJeremy L Thompson                                                                          CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
273eda0adbcSSebastian Grimberg                                                                          CeedScalar *__restrict__ vv) {
27477d1c127SSebastian Grimberg   // Restriction with tridiagonal transformation
27594648b7dSSebastian Grimberg   CeedElemRestriction_Ref *impl;
2765c7e0f51SSebastian Grimberg   CeedScalar               vv_loc[block_size];
277ad70ee2cSJeremy L Thompson 
2781cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
279ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
280fcbe8c06SSebastian Grimberg     for (CeedInt k = 0; k < num_comp; k++) {
281fcbe8c06SSebastian Grimberg       // Iteration bound set to discard padding elements
28258c07c4fSSebastian Grimberg       const CeedInt block_end = CeedIntMin(block_size, num_elem - e);
28358c07c4fSSebastian Grimberg       CeedInt       n         = 0;
28458c07c4fSSebastian Grimberg 
2855c7e0f51SSebastian Grimberg       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
286*171d97d0SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
287ad70ee2cSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
288*171d97d0SJeremy L Thompson                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
289ad70ee2cSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
2905c7e0f51SSebastian Grimberg       }
2915c7e0f51SSebastian Grimberg       for (CeedInt j = 0; j < block_end; j++) {
2925c7e0f51SSebastian Grimberg         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
2930c73c039SSebastian Grimberg       }
2940c73c039SSebastian Grimberg       for (n = 1; n < elem_size - 1; n++) {
2955c7e0f51SSebastian Grimberg         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
296*171d97d0SJeremy L Thompson           vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
297ad70ee2cSJeremy L Thompson                           impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
298*171d97d0SJeremy L Thompson                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
299ad70ee2cSJeremy L Thompson                           impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
300*171d97d0SJeremy L Thompson                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
301ad70ee2cSJeremy L Thompson                           impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
3020c73c039SSebastian Grimberg         }
303ad70ee2cSJeremy L Thompson         for (CeedInt j = 0; j < block_end; j++) {
3045c7e0f51SSebastian Grimberg           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3055c7e0f51SSebastian Grimberg         }
3065c7e0f51SSebastian Grimberg       }
3075c7e0f51SSebastian Grimberg       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
308*171d97d0SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
309ad70ee2cSJeremy L Thompson                         impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
310*171d97d0SJeremy L Thompson                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
311ad70ee2cSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
3125c7e0f51SSebastian Grimberg       }
3135c7e0f51SSebastian Grimberg       for (CeedInt j = 0; j < block_end; j++) {
3145c7e0f51SSebastian Grimberg         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
31521617c04Sjeremylt       }
316b435c5a6Srezgarshakeri     }
3172b730f8bSJeremy L Thompson   }
318e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
31921617c04Sjeremylt }
32021617c04Sjeremylt 
3211cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp,
322ad70ee2cSJeremy L Thompson                                                                                  const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
323*171d97d0SJeremy L Thompson                                                                                  CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
324eda0adbcSSebastian Grimberg                                                                                  const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
32594648b7dSSebastian Grimberg   // Restriction with (unsigned) tridiagonal transformation
3267c1dbaffSSebastian Grimberg   CeedElemRestriction_Ref *impl;
3275c7e0f51SSebastian Grimberg   CeedScalar               vv_loc[block_size];
328ad70ee2cSJeremy L Thompson 
3291cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
330ad70ee2cSJeremy L Thompson   for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) {
3317c1dbaffSSebastian Grimberg     for (CeedInt k = 0; k < num_comp; k++) {
3327c1dbaffSSebastian Grimberg       // Iteration bound set to discard padding elements
333ad70ee2cSJeremy L Thompson       const CeedInt block_end = CeedIntMin(block_size, num_elem - e);
33458c07c4fSSebastian Grimberg       CeedInt       n         = 0;
335ad70ee2cSJeremy L Thompson 
3365c7e0f51SSebastian Grimberg       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
337*171d97d0SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
338ad70ee2cSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
339*171d97d0SJeremy L Thompson                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
340ad70ee2cSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
3415c7e0f51SSebastian Grimberg       }
3425c7e0f51SSebastian Grimberg       for (CeedInt j = 0; j < block_end; j++) {
3435c7e0f51SSebastian Grimberg         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3447c1dbaffSSebastian Grimberg       }
3457c1dbaffSSebastian Grimberg       for (n = 1; n < elem_size - 1; n++) {
3465c7e0f51SSebastian Grimberg         CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
347*171d97d0SJeremy L Thompson           vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
348ad70ee2cSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
349*171d97d0SJeremy L Thompson                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
350ad70ee2cSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
351*171d97d0SJeremy L Thompson                       uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] *
352ad70ee2cSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
3537c1dbaffSSebastian Grimberg         }
354ad70ee2cSJeremy L Thompson         for (CeedInt j = 0; j < block_end; j++) {
3555c7e0f51SSebastian Grimberg           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3565c7e0f51SSebastian Grimberg         }
3575c7e0f51SSebastian Grimberg       }
3585c7e0f51SSebastian Grimberg       CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) {
359*171d97d0SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] *
360ad70ee2cSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
361*171d97d0SJeremy L Thompson                     uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] *
362ad70ee2cSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
3635c7e0f51SSebastian Grimberg       }
3645c7e0f51SSebastian Grimberg       for (CeedInt j = 0; j < block_end; j++) {
3655c7e0f51SSebastian Grimberg         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3667c1dbaffSSebastian Grimberg       }
3677c1dbaffSSebastian Grimberg     }
3687c1dbaffSSebastian Grimberg   }
3697c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
3707c1dbaffSSebastian Grimberg }
3717c1dbaffSSebastian Grimberg 
3721249ccc5SJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop,
373eda0adbcSSebastian Grimberg                                                                      CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
374eda0adbcSSebastian Grimberg                                                                      CeedScalar *__restrict__ vv) {
375*171d97d0SJeremy L Thompson   CeedInt                  num_points, l_vec_offset;
376*171d97d0SJeremy L Thompson   CeedSize                 e_vec_offset = 0;
37705fa913cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
37805fa913cSJeremy L Thompson 
37905fa913cSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
38005fa913cSJeremy L Thompson   for (CeedInt e = start; e < stop; e++) {
3810930e4e7SJeremy L Thompson     l_vec_offset = impl->offsets[e];
38205fa913cSJeremy L Thompson     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
38305fa913cSJeremy L Thompson     if (t_mode == CEED_NOTRANSPOSE) {
38405fa913cSJeremy L Thompson       for (CeedInt i = 0; i < num_points; i++) {
385*171d97d0SJeremy L Thompson         for (CeedInt j = 0; j < num_comp; j++) vv[j * (CeedSize)num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
38605fa913cSJeremy L Thompson       }
38705fa913cSJeremy L Thompson     } else {
38805fa913cSJeremy L Thompson       for (CeedInt i = 0; i < num_points; i++) {
389*171d97d0SJeremy L Thompson         for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * (CeedSize)num_points + i + e_vec_offset];
39005fa913cSJeremy L Thompson       }
39105fa913cSJeremy L Thompson     }
392*171d97d0SJeremy L Thompson     e_vec_offset += num_points * (CeedSize)num_comp;
39305fa913cSJeremy L Thompson   }
39405fa913cSJeremy L Thompson   return CEED_ERROR_SUCCESS;
39505fa913cSJeremy L Thompson }
39605fa913cSJeremy L Thompson 
3971cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
398ad70ee2cSJeremy L Thompson                                                     const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs,
399ad70ee2cSJeremy L Thompson                                                     bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
400*171d97d0SJeremy L Thompson   CeedInt             num_elem, elem_size;
401*171d97d0SJeremy L Thompson   CeedSize            v_offset = 0;
402ad70ee2cSJeremy L Thompson   CeedRestrictionType rstr_type;
4037c1dbaffSSebastian Grimberg   const CeedScalar   *uu;
4047c1dbaffSSebastian Grimberg   CeedScalar         *vv;
405ad70ee2cSJeremy L Thompson 
4061cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
4071cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
408*171d97d0SJeremy L Thompson   v_offset = start * block_size * elem_size * (CeedSize)num_comp;
4091cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
41094648b7dSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
411ad70ee2cSJeremy L Thompson 
41294648b7dSSebastian Grimberg   if (t_mode == CEED_TRANSPOSE) {
41394648b7dSSebastian Grimberg     // Sum into for transpose mode, E-vector to L-vector
41494648b7dSSebastian Grimberg     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
41594648b7dSSebastian Grimberg   } else {
41694648b7dSSebastian Grimberg     // Overwrite for notranspose mode, L-vector to E-vector
41794648b7dSSebastian Grimberg     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
41894648b7dSSebastian Grimberg   }
41994648b7dSSebastian Grimberg   if (t_mode == CEED_TRANSPOSE) {
4207c1dbaffSSebastian Grimberg     // Restriction from E-vector to L-vector
4217c1dbaffSSebastian Grimberg     // Performing v += r^T * u
4227c1dbaffSSebastian Grimberg     // uu has shape [elem_size, num_comp, num_elem], row-major
4237c1dbaffSSebastian Grimberg     // vv has shape [nnodes, num_comp]
4247c1dbaffSSebastian Grimberg     // Sum into for transpose mode
4257c1dbaffSSebastian Grimberg     switch (rstr_type) {
4267c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_STRIDED:
4275d10938bSJeremy L Thompson         CeedCallBackend(
4281cc2c60dSJeremy L Thompson             CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
42994648b7dSSebastian Grimberg         break;
43061a27d74SSebastian Grimberg       case CEED_RESTRICTION_STANDARD:
431eda0adbcSSebastian Grimberg         CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
4325d10938bSJeremy L Thompson                                                                          v_offset, uu, vv));
43394648b7dSSebastian Grimberg         break;
4347c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_ORIENTED:
43594648b7dSSebastian Grimberg         if (use_signs) {
4361cc2c60dSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4371cc2c60dSJeremy L Thompson                                                                              elem_size, v_offset, uu, vv));
43894648b7dSSebastian Grimberg         } else {
439eda0adbcSSebastian Grimberg           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
440eda0adbcSSebastian Grimberg                                                                            v_offset, uu, vv));
44194648b7dSSebastian Grimberg         }
44294648b7dSSebastian Grimberg         break;
44394648b7dSSebastian Grimberg       case CEED_RESTRICTION_CURL_ORIENTED:
44494648b7dSSebastian Grimberg         if (use_signs && use_orients) {
4451cc2c60dSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4465d10938bSJeremy L Thompson                                                                                  elem_size, v_offset, uu, vv));
44794648b7dSSebastian Grimberg         } else if (use_orients) {
4481cc2c60dSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
4491cc2c60dSJeremy L Thompson                                                                                          num_elem, elem_size, v_offset, uu, vv));
45094648b7dSSebastian Grimberg         } else {
451eda0adbcSSebastian Grimberg           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
452eda0adbcSSebastian Grimberg                                                                            v_offset, uu, vv));
45394648b7dSSebastian Grimberg         }
45494648b7dSSebastian Grimberg         break;
4552c7e7413SJeremy L Thompson       case CEED_RESTRICTION_POINTS:
4561249ccc5SJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
4572c7e7413SJeremy L Thompson         break;
45894648b7dSSebastian Grimberg     }
45994648b7dSSebastian Grimberg   } else {
46094648b7dSSebastian Grimberg     // Restriction from L-vector to E-vector
46194648b7dSSebastian Grimberg     // Perform: v = r * u
46294648b7dSSebastian Grimberg     // vv has shape [elem_size, num_comp, num_elem], row-major
46394648b7dSSebastian Grimberg     // uu has shape [nnodes, num_comp]
46494648b7dSSebastian Grimberg     // Overwrite for notranspose mode
46594648b7dSSebastian Grimberg     switch (rstr_type) {
46694648b7dSSebastian Grimberg       case CEED_RESTRICTION_STRIDED:
4675d10938bSJeremy L Thompson         CeedCallBackend(
4681cc2c60dSJeremy L Thompson             CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv));
46994648b7dSSebastian Grimberg         break;
47061a27d74SSebastian Grimberg       case CEED_RESTRICTION_STANDARD:
471eda0adbcSSebastian Grimberg         CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size,
472eda0adbcSSebastian Grimberg                                                                            v_offset, uu, vv));
47394648b7dSSebastian Grimberg         break;
47494648b7dSSebastian Grimberg       case CEED_RESTRICTION_ORIENTED:
47594648b7dSSebastian Grimberg         if (use_signs) {
4761cc2c60dSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4771cc2c60dSJeremy L Thompson                                                                                elem_size, v_offset, uu, vv));
47894648b7dSSebastian Grimberg         } else {
479eda0adbcSSebastian Grimberg           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4801cc2c60dSJeremy L Thompson                                                                              elem_size, v_offset, uu, vv));
48194648b7dSSebastian Grimberg         }
48294648b7dSSebastian Grimberg         break;
48394648b7dSSebastian Grimberg       case CEED_RESTRICTION_CURL_ORIENTED:
48494648b7dSSebastian Grimberg         if (use_signs && use_orients) {
4851cc2c60dSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4865d10938bSJeremy L Thompson                                                                                    elem_size, v_offset, uu, vv));
48794648b7dSSebastian Grimberg         } else if (use_orients) {
4881cc2c60dSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop,
4895d10938bSJeremy L Thompson                                                                                            num_elem, elem_size, v_offset, uu, vv));
49094648b7dSSebastian Grimberg         } else {
491eda0adbcSSebastian Grimberg           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4921cc2c60dSJeremy L Thompson                                                                              elem_size, v_offset, uu, vv));
49394648b7dSSebastian Grimberg         }
49494648b7dSSebastian Grimberg         break;
4952c7e7413SJeremy L Thompson       case CEED_RESTRICTION_POINTS:
4961249ccc5SJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
4972c7e7413SJeremy L Thompson         break;
49894648b7dSSebastian Grimberg     }
4997c1dbaffSSebastian Grimberg   }
5007c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
5017c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
5027c1dbaffSSebastian Grimberg   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
5037c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
5047c1dbaffSSebastian Grimberg }
5057c1dbaffSSebastian Grimberg 
5067c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
507f10650afSjeremylt // ElemRestriction Apply - Common Sizes
508f10650afSjeremylt //------------------------------------------------------------------------------
5091cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5107c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5117c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5121cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
513d979a051Sjeremylt }
514d979a051Sjeremylt 
5151cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5167c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5177c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5181cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
5194d2a38eeSjeremylt }
5204d2a38eeSjeremylt 
5211cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5227c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5237c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5241cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
5259c36149bSjeremylt }
5269c36149bSjeremylt 
5271cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5287c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5297c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5301cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
5319c36149bSjeremylt }
5329c36149bSjeremylt 
5331cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5347c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5357c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5361cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
537d979a051Sjeremylt }
538d979a051Sjeremylt 
5391cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5407c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5417c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5421cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
543d979a051Sjeremylt }
544d979a051Sjeremylt 
5451cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5467c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5477c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5481cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
549d979a051Sjeremylt }
550d979a051Sjeremylt 
5511cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5527c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5537c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5541cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
555d979a051Sjeremylt }
556d979a051Sjeremylt 
557bf4d1581Sjeremylt // LCOV_EXCL_START
5581cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5597c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5607c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5611cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
562d979a051Sjeremylt }
563bf4d1581Sjeremylt // LCOV_EXCL_STOP
564d979a051Sjeremylt 
5651cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5667c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5677c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5681cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
569d979a051Sjeremylt }
570d979a051Sjeremylt 
571bf4d1581Sjeremylt // LCOV_EXCL_START
5721cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5737c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5747c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5751cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
576d979a051Sjeremylt }
577bf4d1581Sjeremylt // LCOV_EXCL_STOP
578d979a051Sjeremylt 
5791cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride,
5807c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
5810c73c039SSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
5821cc2c60dSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
5834d2a38eeSjeremylt }
5844d2a38eeSjeremylt 
585f10650afSjeremylt //------------------------------------------------------------------------------
586f10650afSjeremylt // ElemRestriction Apply
587f10650afSjeremylt //------------------------------------------------------------------------------
5881cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
589ad70ee2cSJeremy L Thompson   CeedInt                  num_block, block_size, num_comp, comp_stride;
590ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
591ad70ee2cSJeremy L Thompson 
5921cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
5931cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
5941cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
5951cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
5961cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5971cc2c60dSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
5985d10938bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
599f30b1135SSebastian Grimberg }
600f30b1135SSebastian Grimberg 
601f30b1135SSebastian Grimberg //------------------------------------------------------------------------------
602f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned
603f30b1135SSebastian Grimberg //------------------------------------------------------------------------------
6041cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
6051cc2c60dSJeremy L Thompson                                                 CeedRequest *request) {
606ad70ee2cSJeremy L Thompson   CeedInt                  num_block, block_size, num_comp, comp_stride;
607ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
608ad70ee2cSJeremy L Thompson 
6091cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
6101cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
6111cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
6121cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
6131cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
6141cc2c60dSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
6155d10938bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
6167c1dbaffSSebastian Grimberg }
6177c1dbaffSSebastian Grimberg 
6187c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
6197c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented
6207c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
6211cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
6221cc2c60dSJeremy L Thompson                                                   CeedRequest *request) {
623ad70ee2cSJeremy L Thompson   CeedInt                  num_block, block_size, num_comp, comp_stride;
624ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
625ad70ee2cSJeremy L Thompson 
6261cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
6271cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
6281cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
6291cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
6301cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
6311cc2c60dSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
6325d10938bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
6339c36149bSjeremylt }
634be9261b7Sjeremylt 
635f10650afSjeremylt //------------------------------------------------------------------------------
6362c7e7413SJeremy L Thompson // ElemRestriction Apply Points
6372c7e7413SJeremy L Thompson //------------------------------------------------------------------------------
638eda0adbcSSebastian Grimberg static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
6392c7e7413SJeremy L Thompson                                                          CeedRequest *request) {
64005fa913cSJeremy L Thompson   CeedInt                  num_comp;
6412c7e7413SJeremy L Thompson   CeedElemRestriction_Ref *impl;
6422c7e7413SJeremy L Thompson 
643eda0adbcSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
644eda0adbcSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
645eda0adbcSSebastian Grimberg   return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
6462c7e7413SJeremy L Thompson }
6472c7e7413SJeremy L Thompson 
6482c7e7413SJeremy L Thompson //------------------------------------------------------------------------------
649f10650afSjeremylt // ElemRestriction Apply Block
650f10650afSjeremylt //------------------------------------------------------------------------------
6511cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
652074cb416Sjeremylt                                              CeedRequest *request) {
653ad70ee2cSJeremy L Thompson   CeedInt                  block_size, num_comp, comp_stride;
654ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
655ad70ee2cSJeremy L Thompson 
6561cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
6571cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
6581cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
6591cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
6601cc2c60dSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
6615d10938bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
6629c36149bSjeremylt }
663be9261b7Sjeremylt 
664f10650afSjeremylt //------------------------------------------------------------------------------
665bd33150aSjeremylt // ElemRestriction Get Offsets
666bd33150aSjeremylt //------------------------------------------------------------------------------
6672b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
668ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
669ad70ee2cSJeremy L Thompson 
670ad70ee2cSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
671bd33150aSjeremylt 
6726e536b99SJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
673bd33150aSjeremylt 
674bd33150aSjeremylt   *offsets = impl->offsets;
675e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
676bd33150aSjeremylt }
677bd33150aSjeremylt 
678bd33150aSjeremylt //------------------------------------------------------------------------------
67977d1c127SSebastian Grimberg // ElemRestriction Get Orientations
68077d1c127SSebastian Grimberg //------------------------------------------------------------------------------
68177d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
682ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
683ad70ee2cSJeremy L Thompson 
684ad70ee2cSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
68577d1c127SSebastian Grimberg 
6866e536b99SJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
68777d1c127SSebastian Grimberg 
68877d1c127SSebastian Grimberg   *orients = impl->orients;
68977d1c127SSebastian Grimberg   return CEED_ERROR_SUCCESS;
69077d1c127SSebastian Grimberg }
69177d1c127SSebastian Grimberg 
69277d1c127SSebastian Grimberg //------------------------------------------------------------------------------
69377d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations
69477d1c127SSebastian Grimberg //------------------------------------------------------------------------------
6950c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
696ad70ee2cSJeremy L Thompson   CeedElemRestriction_Ref *impl;
697ad70ee2cSJeremy L Thompson 
698ad70ee2cSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
69977d1c127SSebastian Grimberg 
7006e536b99SJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
70177d1c127SSebastian Grimberg 
70277d1c127SSebastian Grimberg   *curl_orients = impl->curl_orients;
70377d1c127SSebastian Grimberg   return CEED_ERROR_SUCCESS;
70477d1c127SSebastian Grimberg }
70577d1c127SSebastian Grimberg 
70677d1c127SSebastian Grimberg //------------------------------------------------------------------------------
707f10650afSjeremylt // ElemRestriction Destroy
708f10650afSjeremylt //------------------------------------------------------------------------------
7091cc2c60dSJeremy L Thompson static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) {
710fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
71121617c04Sjeremylt 
7121cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
7132b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
71477d1c127SSebastian Grimberg   CeedCallBackend(CeedFree(&impl->orients_allocated));
71577d1c127SSebastian Grimberg   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
7162b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
717e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
71821617c04Sjeremylt }
71921617c04Sjeremylt 
720f10650afSjeremylt //------------------------------------------------------------------------------
721f10650afSjeremylt // ElemRestriction Create
722f10650afSjeremylt //------------------------------------------------------------------------------
723fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
7241cc2c60dSJeremy L Thompson                                   const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
725ad70ee2cSJeremy L Thompson   Ceed                     ceed;
72607d5dec1SJeremy L Thompson   CeedInt                  num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
727ad70ee2cSJeremy L Thompson   CeedRestrictionType      rstr_type;
72821617c04Sjeremylt   CeedElemRestriction_Ref *impl;
729ad70ee2cSJeremy L Thompson 
7301cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
7311cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
7321cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
7331cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
7341cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
7351cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
7361cc2c60dSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
73722eb1385SJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
73821617c04Sjeremylt 
7396574a04fSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
740dce49693SSebastian Grimberg 
7412b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
742dce49693SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
74322eb1385SJeremy L Thompson 
74422eb1385SJeremy L Thompson   // Set layouts
74522eb1385SJeremy L Thompson   {
74622eb1385SJeremy L Thompson     bool    has_backend_strides;
74722eb1385SJeremy L Thompson     CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
74822eb1385SJeremy L Thompson 
749dce49693SSebastian Grimberg     CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout));
75022eb1385SJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_STRIDED) {
75122eb1385SJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
75222eb1385SJeremy L Thompson       if (has_backend_strides) {
75322eb1385SJeremy L Thompson         CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout));
75422eb1385SJeremy L Thompson       }
75522eb1385SJeremy L Thompson     }
75622eb1385SJeremy L Thompson   }
7573661185eSjeremylt 
75892fe105eSJeremy L Thompson   // Offsets data
759fcbe8c06SSebastian Grimberg   if (rstr_type != CEED_RESTRICTION_STRIDED) {
7603661185eSjeremylt     const char *resource;
761ad70ee2cSJeremy L Thompson 
762ad70ee2cSJeremy L Thompson     // Check indices for ref or memcheck backends
76335aed383SJeremy L Thompson     {
76435aed383SJeremy L Thompson       Ceed current = ceed, parent = NULL;
76535aed383SJeremy L Thompson 
76635aed383SJeremy L Thompson       CeedCallBackend(CeedGetParent(current, &parent));
76735aed383SJeremy L Thompson       while (current != parent) {
76835aed383SJeremy L Thompson         current = parent;
76935aed383SJeremy L Thompson         CeedCallBackend(CeedGetParent(current, &parent));
77035aed383SJeremy L Thompson       }
77135aed383SJeremy L Thompson       CeedCallBackend(CeedGetResource(parent, &resource));
77235aed383SJeremy L Thompson     }
7732b730f8bSJeremy L Thompson     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
774d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
775e79b91d9SJeremy L Thompson       CeedSize l_size;
7763661185eSjeremylt 
7771cc2c60dSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
7782b730f8bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
7796574a04fSJeremy L Thompson         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
7806574a04fSJeremy L Thompson                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
7812b730f8bSJeremy L Thompson       }
7822b730f8bSJeremy L Thompson     }
7833661185eSjeremylt 
78492fe105eSJeremy L Thompson     // Copy data
78507d5dec1SJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
78607d5dec1SJeremy L Thompson     num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
787d1d35e2fSjeremylt     switch (copy_mode) {
78821617c04Sjeremylt       case CEED_COPY_VALUES:
78907d5dec1SJeremy L Thompson         CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated));
79007d5dec1SJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0]));
791d979a051Sjeremylt         impl->offsets = impl->offsets_allocated;
79221617c04Sjeremylt         break;
79321617c04Sjeremylt       case CEED_OWN_POINTER:
794d979a051Sjeremylt         impl->offsets_allocated = (CeedInt *)offsets;
795d979a051Sjeremylt         impl->offsets           = impl->offsets_allocated;
79621617c04Sjeremylt         break;
79721617c04Sjeremylt       case CEED_USE_POINTER:
798d979a051Sjeremylt         impl->offsets = offsets;
79921617c04Sjeremylt     }
800fcbe8c06SSebastian Grimberg 
801fcbe8c06SSebastian Grimberg     // Orientation data
802fcbe8c06SSebastian Grimberg     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
8030305e208SSebastian Grimberg       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
804fcbe8c06SSebastian Grimberg       switch (copy_mode) {
805fcbe8c06SSebastian Grimberg         case CEED_COPY_VALUES:
80607d5dec1SJeremy L Thompson           CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated));
80707d5dec1SJeremy L Thompson           memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0]));
808fcbe8c06SSebastian Grimberg           impl->orients = impl->orients_allocated;
809fcbe8c06SSebastian Grimberg           break;
810fcbe8c06SSebastian Grimberg         case CEED_OWN_POINTER:
811fcbe8c06SSebastian Grimberg           impl->orients_allocated = (bool *)orients;
812fcbe8c06SSebastian Grimberg           impl->orients           = impl->orients_allocated;
813fcbe8c06SSebastian Grimberg           break;
814fcbe8c06SSebastian Grimberg         case CEED_USE_POINTER:
815fcbe8c06SSebastian Grimberg           impl->orients = orients;
816fcbe8c06SSebastian Grimberg       }
817fcbe8c06SSebastian Grimberg     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
8180305e208SSebastian Grimberg       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
819fcbe8c06SSebastian Grimberg       switch (copy_mode) {
820fcbe8c06SSebastian Grimberg         case CEED_COPY_VALUES:
82107d5dec1SJeremy L Thompson           CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated));
82207d5dec1SJeremy L Thompson           memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0]));
823fcbe8c06SSebastian Grimberg           impl->curl_orients = impl->curl_orients_allocated;
824fcbe8c06SSebastian Grimberg           break;
825fcbe8c06SSebastian Grimberg         case CEED_OWN_POINTER:
8260c73c039SSebastian Grimberg           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
827fcbe8c06SSebastian Grimberg           impl->curl_orients           = impl->curl_orients_allocated;
828fcbe8c06SSebastian Grimberg           break;
829fcbe8c06SSebastian Grimberg         case CEED_USE_POINTER:
830fcbe8c06SSebastian Grimberg           impl->curl_orients = curl_orients;
831fcbe8c06SSebastian Grimberg       }
832fcbe8c06SSebastian Grimberg     }
83392fe105eSJeremy L Thompson   }
834fe2413ffSjeremylt 
835ad70ee2cSJeremy L Thompson   // Set apply function based upon num_comp, block_size, and comp_stride
836ad70ee2cSJeremy L Thompson   CeedInt index = -1;
837ad70ee2cSJeremy L Thompson 
838ad70ee2cSJeremy L Thompson   if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1);
839ad70ee2cSJeremy L Thompson   switch (index) {
840d979a051Sjeremylt     case 110:
841d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_110;
842d979a051Sjeremylt       break;
843d979a051Sjeremylt     case 111:
844d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_111;
845d979a051Sjeremylt       break;
846d979a051Sjeremylt     case 180:
847d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_180;
848d979a051Sjeremylt       break;
849d979a051Sjeremylt     case 181:
850d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_181;
851d979a051Sjeremylt       break;
852d979a051Sjeremylt     case 310:
853d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_310;
854d979a051Sjeremylt       break;
855d979a051Sjeremylt     case 311:
856d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_311;
857d979a051Sjeremylt       break;
858d979a051Sjeremylt     case 380:
859d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_380;
860d979a051Sjeremylt       break;
861d979a051Sjeremylt     case 381:
862d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_381;
863d979a051Sjeremylt       break;
864bf4d1581Sjeremylt     // LCOV_EXCL_START
865d979a051Sjeremylt     case 510:
866d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_510;
867d979a051Sjeremylt       break;
868bf4d1581Sjeremylt     // LCOV_EXCL_STOP
869d979a051Sjeremylt     case 511:
870d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_511;
871d979a051Sjeremylt       break;
872bf4d1581Sjeremylt     // LCOV_EXCL_START
873d979a051Sjeremylt     case 580:
874d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_580;
875d979a051Sjeremylt       break;
876bf4d1581Sjeremylt     // LCOV_EXCL_STOP
877d979a051Sjeremylt     case 581:
878d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_581;
879d979a051Sjeremylt       break;
880d979a051Sjeremylt     default:
881d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_Core;
882d979a051Sjeremylt       break;
883d979a051Sjeremylt   }
884dce49693SSebastian Grimberg 
885dce49693SSebastian Grimberg   // Register backend functions
886dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref));
887dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
888dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref));
889dce49693SSebastian Grimberg   if (rstr_type == CEED_RESTRICTION_POINTS) {
890dce49693SSebastian Grimberg     CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref));
891dce49693SSebastian Grimberg   }
892dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
893dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
894dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
895dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
896dce49693SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref));
897e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
89821617c04Sjeremylt }
899fc0567d9Srezgarshakeri 
900fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
901