xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 7c1dbaff56f8afaddfc574209b09713e25514e81)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
321617c04Sjeremylt //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
521617c04Sjeremylt //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
721617c04Sjeremylt 
849aac155SJeremy L Thompson #include <ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
11fcbe8c06SSebastian Grimberg #include <stdlib.h>
123d576824SJeremy L Thompson #include <string.h>
132b730f8bSJeremy L Thompson 
1421617c04Sjeremylt #include "ceed-ref.h"
1521617c04Sjeremylt 
16f10650afSjeremylt //------------------------------------------------------------------------------
17f10650afSjeremylt // Core ElemRestriction Apply Code
18f10650afSjeremylt //------------------------------------------------------------------------------
190c73c039SSebastian Grimberg static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
20*7c1dbaffSSebastian Grimberg                                                                const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v,
21*7c1dbaffSSebastian Grimberg                                                                CeedRequest *request) {
224ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
232b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2421617c04Sjeremylt   const CeedScalar *uu;
2521617c04Sjeremylt   CeedScalar       *vv;
26d1d35e2fSjeremylt   CeedInt           num_elem, elem_size, v_offset;
272b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
282b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
29d1d35e2fSjeremylt   v_offset = start * blk_size * elem_size * num_comp;
30fcbe8c06SSebastian Grimberg   CeedRestrictionType rstr_type;
31fcbe8c06SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
3221617c04Sjeremylt 
337f90ec76Sjeremylt   // Restriction from L-vector to E-vector
3421617c04Sjeremylt   // Perform: v = r * u
35fcbe8c06SSebastian Grimberg   // vv has shape [elem_size, num_comp, num_elem], row-major
36fcbe8c06SSebastian Grimberg   // uu has shape [nnodes, num_comp]
370c73c039SSebastian Grimberg   // Overwrite for notranspose mode
380c73c039SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
390c73c039SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
40fcbe8c06SSebastian Grimberg   switch (rstr_type) {
41fcbe8c06SSebastian Grimberg     case CEED_RESTRICTION_STRIDED: {
42d979a051Sjeremylt       // No offsets provided, Identity Restriction
43d1d35e2fSjeremylt       bool has_backend_strides;
442b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
45d1d35e2fSjeremylt       if (has_backend_strides) {
46d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
477f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
482b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
492b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
502b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
512b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
522b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
532b730f8bSJeremy L Thompson                     uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
542b730f8bSJeremy L Thompson               }
552b730f8bSJeremy L Thompson             }
562b730f8bSJeremy L Thompson           }
572b730f8bSJeremy L Thompson         }
587f90ec76Sjeremylt       } else {
597f90ec76Sjeremylt         // User provided strides
607f90ec76Sjeremylt         CeedInt strides[3];
612b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
622b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
632b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
642b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
652b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
662b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
672b730f8bSJeremy L Thompson                     uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
682b730f8bSJeremy L Thompson               }
692b730f8bSJeremy L Thompson             }
702b730f8bSJeremy L Thompson           }
712b730f8bSJeremy L Thompson         }
727509a596Sjeremylt       }
73fcbe8c06SSebastian Grimberg     } break;
740c73c039SSebastian Grimberg     case CEED_RESTRICTION_DEFAULT: {
75fcbe8c06SSebastian Grimberg       // Default restriction with offsets
762b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
772b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
782b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
790c73c039SSebastian Grimberg             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
80fcbe8c06SSebastian Grimberg           }
81fcbe8c06SSebastian Grimberg         }
82fcbe8c06SSebastian Grimberg       }
830c73c039SSebastian Grimberg     } break;
840c73c039SSebastian Grimberg     case CEED_RESTRICTION_ORIENTED: {
85fcbe8c06SSebastian Grimberg       // Restriction with orientations
86fcbe8c06SSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
87fcbe8c06SSebastian Grimberg         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
88fcbe8c06SSebastian Grimberg           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
890c73c039SSebastian Grimberg             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] =
90*7c1dbaffSSebastian Grimberg                 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
91fcbe8c06SSebastian Grimberg           }
92fcbe8c06SSebastian Grimberg         }
93fcbe8c06SSebastian Grimberg       }
940c73c039SSebastian Grimberg     } break;
950c73c039SSebastian Grimberg     case CEED_RESTRICTION_CURL_ORIENTED: {
9677d1c127SSebastian Grimberg       // Restriction with tridiagonal transformation
97fcbe8c06SSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
98fcbe8c06SSebastian Grimberg         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
990c73c039SSebastian Grimberg           CeedInt n = 0;
1000c73c039SSebastian Grimberg           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
1010c73c039SSebastian Grimberg             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
1020c73c039SSebastian Grimberg                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
103*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
1040c73c039SSebastian Grimberg                 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
105*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size];
1060c73c039SSebastian Grimberg           }
1070c73c039SSebastian Grimberg           for (n = 1; n < elem_size - 1; n++) {
1080c73c039SSebastian Grimberg             CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
1090c73c039SSebastian Grimberg               vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
1100c73c039SSebastian Grimberg                   uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
111*7c1dbaffSSebastian Grimberg                       impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] +
1120c73c039SSebastian Grimberg                   uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
113*7c1dbaffSSebastian Grimberg                       impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
1140c73c039SSebastian Grimberg                   uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
115*7c1dbaffSSebastian Grimberg                       impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size];
1160c73c039SSebastian Grimberg             }
1170c73c039SSebastian Grimberg           }
1180c73c039SSebastian Grimberg           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
1190c73c039SSebastian Grimberg             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
1200c73c039SSebastian Grimberg                 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
121*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] +
1220c73c039SSebastian Grimberg                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
123*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size];
1242b730f8bSJeremy L Thompson           }
1252b730f8bSJeremy L Thompson         }
1262b730f8bSJeremy L Thompson       }
1270c73c039SSebastian Grimberg     } break;
128b435c5a6Srezgarshakeri   }
1290c73c039SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
1300c73c039SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
1310c73c039SSebastian Grimberg   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
1320c73c039SSebastian Grimberg   return CEED_ERROR_SUCCESS;
133fcbe8c06SSebastian Grimberg }
1340c73c039SSebastian Grimberg 
1350c73c039SSebastian Grimberg //------------------------------------------------------------------------------
136*7c1dbaffSSebastian Grimberg // Core Unsigned ElemRestriction Apply Code
1370c73c039SSebastian Grimberg //------------------------------------------------------------------------------
138*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
139*7c1dbaffSSebastian Grimberg                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
1400c73c039SSebastian Grimberg                                                                        CeedVector v, CeedRequest *request) {
1410c73c039SSebastian Grimberg   Ceed ceed;
1420c73c039SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
1430c73c039SSebastian Grimberg   CeedElemRestriction_Ref *impl;
1440c73c039SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
1450c73c039SSebastian Grimberg   const CeedScalar *uu;
1460c73c039SSebastian Grimberg   CeedScalar       *vv;
1470c73c039SSebastian Grimberg   CeedInt           num_elem, elem_size, v_offset;
1480c73c039SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
1490c73c039SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
1500c73c039SSebastian Grimberg   v_offset = start * blk_size * elem_size * num_comp;
1510c73c039SSebastian Grimberg   CeedRestrictionType rstr_type;
1520c73c039SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
1530c73c039SSebastian Grimberg 
154*7c1dbaffSSebastian Grimberg   // Restriction from L-vector to E-vector
155*7c1dbaffSSebastian Grimberg   // Perform: v = r * u
156*7c1dbaffSSebastian Grimberg   // vv has shape [elem_size, num_comp, num_elem], row-major
157*7c1dbaffSSebastian Grimberg   // uu has shape [nnodes, num_comp]
158*7c1dbaffSSebastian Grimberg   // Overwrite for notranspose mode
159*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
160*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
161*7c1dbaffSSebastian Grimberg   switch (rstr_type) {
162*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_ORIENTED: {
163*7c1dbaffSSebastian Grimberg       // Restriction with orientations
164*7c1dbaffSSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
165*7c1dbaffSSebastian Grimberg         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
166*7c1dbaffSSebastian Grimberg           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
167*7c1dbaffSSebastian Grimberg             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
168*7c1dbaffSSebastian Grimberg           }
169*7c1dbaffSSebastian Grimberg         }
170*7c1dbaffSSebastian Grimberg       }
171*7c1dbaffSSebastian Grimberg     } break;
172*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_CURL_ORIENTED: {
173*7c1dbaffSSebastian Grimberg       // Restriction with tridiagonal transformation
174*7c1dbaffSSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
175*7c1dbaffSSebastian Grimberg         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
176*7c1dbaffSSebastian Grimberg           CeedInt n = 0;
177*7c1dbaffSSebastian Grimberg           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
178*7c1dbaffSSebastian Grimberg             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
179*7c1dbaffSSebastian Grimberg                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
180*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
181*7c1dbaffSSebastian Grimberg                 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
182*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]);
183*7c1dbaffSSebastian Grimberg           }
184*7c1dbaffSSebastian Grimberg           for (n = 1; n < elem_size - 1; n++) {
185*7c1dbaffSSebastian Grimberg             CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
186*7c1dbaffSSebastian Grimberg               vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
187*7c1dbaffSSebastian Grimberg                   uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
188*7c1dbaffSSebastian Grimberg                       abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) +
189*7c1dbaffSSebastian Grimberg                   uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
190*7c1dbaffSSebastian Grimberg                       abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
191*7c1dbaffSSebastian Grimberg                   uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] *
192*7c1dbaffSSebastian Grimberg                       abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]);
193*7c1dbaffSSebastian Grimberg             }
194*7c1dbaffSSebastian Grimberg           }
195*7c1dbaffSSebastian Grimberg           CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
196*7c1dbaffSSebastian Grimberg             vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
197*7c1dbaffSSebastian Grimberg                 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] *
198*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) +
199*7c1dbaffSSebastian Grimberg                 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] *
200*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]);
201*7c1dbaffSSebastian Grimberg           }
202*7c1dbaffSSebastian Grimberg         }
203*7c1dbaffSSebastian Grimberg       }
204*7c1dbaffSSebastian Grimberg     } break;
205*7c1dbaffSSebastian Grimberg     // LCOV_EXCL_START
206*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_STRIDED:
207*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
208*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_DEFAULT:
209*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
210*7c1dbaffSSebastian Grimberg       // LCOV_EXCL_STOP
211*7c1dbaffSSebastian Grimberg   }
212*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
213*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
214*7c1dbaffSSebastian Grimberg   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
215*7c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
216*7c1dbaffSSebastian Grimberg }
217*7c1dbaffSSebastian Grimberg 
218*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
219*7c1dbaffSSebastian Grimberg // Core Unoriented ElemRestriction Apply Code
220*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
221*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
222*7c1dbaffSSebastian Grimberg                                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
223*7c1dbaffSSebastian Grimberg                                                                          CeedVector v, CeedRequest *request) {
224*7c1dbaffSSebastian Grimberg   Ceed ceed;
225*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
226*7c1dbaffSSebastian Grimberg   CeedElemRestriction_Ref *impl;
227*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
228*7c1dbaffSSebastian Grimberg   const CeedScalar *uu;
229*7c1dbaffSSebastian Grimberg   CeedScalar       *vv;
230*7c1dbaffSSebastian Grimberg   CeedInt           num_elem, elem_size, v_offset;
231*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
232*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
233*7c1dbaffSSebastian Grimberg   v_offset = start * blk_size * elem_size * num_comp;
234*7c1dbaffSSebastian Grimberg   CeedRestrictionType rstr_type;
235*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
236*7c1dbaffSSebastian Grimberg 
237*7c1dbaffSSebastian Grimberg   // Restriction from L-vector to E-vector
238*7c1dbaffSSebastian Grimberg   // Perform: v = r * u
239*7c1dbaffSSebastian Grimberg   // vv has shape [elem_size, num_comp, num_elem], row-major
240*7c1dbaffSSebastian Grimberg   // uu has shape [nnodes, num_comp]
241*7c1dbaffSSebastian Grimberg   // Overwrite for notranspose mode
242*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
243*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
244*7c1dbaffSSebastian Grimberg   switch (rstr_type) {
245*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_CURL_ORIENTED: {
246*7c1dbaffSSebastian Grimberg       // Restriction with tridiagonal transformation
247*7c1dbaffSSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
248*7c1dbaffSSebastian Grimberg         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
249*7c1dbaffSSebastian Grimberg           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
250*7c1dbaffSSebastian Grimberg             vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
251*7c1dbaffSSebastian Grimberg           }
252*7c1dbaffSSebastian Grimberg         }
253*7c1dbaffSSebastian Grimberg       }
254*7c1dbaffSSebastian Grimberg     } break;
255*7c1dbaffSSebastian Grimberg     // LCOV_EXCL_START
256*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_STRIDED:
257*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
258*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_DEFAULT:
259*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
260*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_ORIENTED:
261*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported");
262*7c1dbaffSSebastian Grimberg       // LCOV_EXCL_STOP
263*7c1dbaffSSebastian Grimberg   }
264*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
265*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
266*7c1dbaffSSebastian Grimberg   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
267*7c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
268*7c1dbaffSSebastian Grimberg }
269*7c1dbaffSSebastian Grimberg 
270*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
271*7c1dbaffSSebastian Grimberg // Core ElemRestriction Apply Transpose Code
272*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
273*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
274*7c1dbaffSSebastian Grimberg                                                              const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v,
275*7c1dbaffSSebastian Grimberg                                                              CeedRequest *request) {
276*7c1dbaffSSebastian Grimberg   CeedElemRestriction_Ref *impl;
277*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
278*7c1dbaffSSebastian Grimberg   const CeedScalar *uu;
279*7c1dbaffSSebastian Grimberg   CeedScalar       *vv;
280*7c1dbaffSSebastian Grimberg   CeedInt           num_elem, elem_size, v_offset;
281*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
282*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
283*7c1dbaffSSebastian Grimberg   v_offset = start * blk_size * elem_size * num_comp;
284*7c1dbaffSSebastian Grimberg   CeedRestrictionType rstr_type;
285*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
286*7c1dbaffSSebastian Grimberg 
2877f90ec76Sjeremylt   // Restriction from E-vector to L-vector
2888d94b059Sjeremylt   // Performing v += r^T * u
2899475e044SSebastian Grimberg   // uu has shape [elem_size, num_comp, num_elem], row-major
290fcbe8c06SSebastian Grimberg   // vv has shape [nnodes, num_comp]
2910c73c039SSebastian Grimberg   // Sum into for transpose mode
2920c73c039SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
2930c73c039SSebastian Grimberg   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
294fcbe8c06SSebastian Grimberg   switch (rstr_type) {
295fcbe8c06SSebastian Grimberg     case CEED_RESTRICTION_STRIDED: {
296d979a051Sjeremylt       // No offsets provided, Identity Restriction
297d1d35e2fSjeremylt       bool has_backend_strides;
2982b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
299d1d35e2fSjeremylt       if (has_backend_strides) {
300d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
3017f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
3022b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
3032b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
3042b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
3052b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
3062b730f8bSJeremy L Thompson                 vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
3072b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
3082b730f8bSJeremy L Thompson               }
3092b730f8bSJeremy L Thompson             }
3102b730f8bSJeremy L Thompson           }
3112b730f8bSJeremy L Thompson         }
3127f90ec76Sjeremylt       } else {
3137f90ec76Sjeremylt         // User provided strides
3147f90ec76Sjeremylt         CeedInt strides[3];
3152b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
3162b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
3172b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
3182b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
3192b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
3202b730f8bSJeremy L Thompson                 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
3212b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
3222b730f8bSJeremy L Thompson               }
3232b730f8bSJeremy L Thompson             }
3242b730f8bSJeremy L Thompson           }
3252b730f8bSJeremy L Thompson         }
326523b8ea0Sjeremylt       }
327fcbe8c06SSebastian Grimberg     } break;
3280c73c039SSebastian Grimberg     case CEED_RESTRICTION_DEFAULT: {
329fcbe8c06SSebastian Grimberg       // Default restriction with offsets
3302b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
3312b730f8bSJeremy L Thompson         for (CeedInt k = 0; k < num_comp; k++) {
3322b730f8bSJeremy L Thompson           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
3338d94b059Sjeremylt             // Iteration bound set to discard padding elements
3342b730f8bSJeremy L Thompson             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
3350c73c039SSebastian Grimberg               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
336fcbe8c06SSebastian Grimberg             }
337fcbe8c06SSebastian Grimberg           }
338fcbe8c06SSebastian Grimberg         }
339fcbe8c06SSebastian Grimberg       }
3400c73c039SSebastian Grimberg     } break;
3410c73c039SSebastian Grimberg     case CEED_RESTRICTION_ORIENTED: {
342fcbe8c06SSebastian Grimberg       // Restriction with orientations
343fcbe8c06SSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
344fcbe8c06SSebastian Grimberg         for (CeedInt k = 0; k < num_comp; k++) {
345fcbe8c06SSebastian Grimberg           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
346fcbe8c06SSebastian Grimberg             // Iteration bound set to discard padding elements
347fcbe8c06SSebastian Grimberg             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
348f30b1135SSebastian Grimberg               vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
349*7c1dbaffSSebastian Grimberg                   uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
350fcbe8c06SSebastian Grimberg             }
351fcbe8c06SSebastian Grimberg           }
352fcbe8c06SSebastian Grimberg         }
353fcbe8c06SSebastian Grimberg       }
3540c73c039SSebastian Grimberg     } break;
3550c73c039SSebastian Grimberg     case CEED_RESTRICTION_CURL_ORIENTED: {
35677d1c127SSebastian Grimberg       // Restriction with tridiagonal transformation
357fcbe8c06SSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
358fcbe8c06SSebastian Grimberg         for (CeedInt k = 0; k < num_comp; k++) {
359fcbe8c06SSebastian Grimberg           // Iteration bound set to discard padding elements
3600c73c039SSebastian Grimberg           CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0;
3610c73c039SSebastian Grimberg           for (CeedInt j = 0; j < blk_end; j++) {
3620c73c039SSebastian Grimberg             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
3630c73c039SSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
364*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
3650c73c039SSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
366*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size];
3670c73c039SSebastian Grimberg           }
3680c73c039SSebastian Grimberg           for (n = 1; n < elem_size - 1; n++) {
3690c73c039SSebastian Grimberg             for (CeedInt j = 0; j < blk_end; j++) {
3700c73c039SSebastian Grimberg               vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
3710c73c039SSebastian Grimberg                   uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
372*7c1dbaffSSebastian Grimberg                       impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] +
3730c73c039SSebastian Grimberg                   uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
374*7c1dbaffSSebastian Grimberg                       impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] +
3750c73c039SSebastian Grimberg                   uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
376*7c1dbaffSSebastian Grimberg                       impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size];
3770c73c039SSebastian Grimberg             }
3780c73c039SSebastian Grimberg           }
3790c73c039SSebastian Grimberg           for (CeedInt j = 0; j < blk_end; j++) {
3800c73c039SSebastian Grimberg             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
3810c73c039SSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
382*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] +
3830c73c039SSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
384*7c1dbaffSSebastian Grimberg                     impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size];
38521617c04Sjeremylt           }
386b435c5a6Srezgarshakeri         }
3872b730f8bSJeremy L Thompson       }
3880c73c039SSebastian Grimberg     } break;
3892b730f8bSJeremy L Thompson   }
3902b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
3912b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
3922b730f8bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
393e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
39421617c04Sjeremylt }
39521617c04Sjeremylt 
396f10650afSjeremylt //------------------------------------------------------------------------------
397*7c1dbaffSSebastian Grimberg // Core Unsigned ElemRestriction Apply Transpose Code
398*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
399*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
400*7c1dbaffSSebastian Grimberg                                                                      const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
401*7c1dbaffSSebastian Grimberg                                                                      CeedVector v, CeedRequest *request) {
402*7c1dbaffSSebastian Grimberg   Ceed ceed;
403*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
404*7c1dbaffSSebastian Grimberg   CeedElemRestriction_Ref *impl;
405*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
406*7c1dbaffSSebastian Grimberg   const CeedScalar *uu;
407*7c1dbaffSSebastian Grimberg   CeedScalar       *vv;
408*7c1dbaffSSebastian Grimberg   CeedInt           num_elem, elem_size, v_offset;
409*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
410*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
411*7c1dbaffSSebastian Grimberg   v_offset = start * blk_size * elem_size * num_comp;
412*7c1dbaffSSebastian Grimberg   CeedRestrictionType rstr_type;
413*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
414*7c1dbaffSSebastian Grimberg 
415*7c1dbaffSSebastian Grimberg   // Restriction from E-vector to L-vector
416*7c1dbaffSSebastian Grimberg   // Performing v += r^T * u
417*7c1dbaffSSebastian Grimberg   // uu has shape [elem_size, num_comp, num_elem], row-major
418*7c1dbaffSSebastian Grimberg   // vv has shape [nnodes, num_comp]
419*7c1dbaffSSebastian Grimberg   // Sum into for transpose mode
420*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
421*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
422*7c1dbaffSSebastian Grimberg   switch (rstr_type) {
423*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_ORIENTED: {
424*7c1dbaffSSebastian Grimberg       // Restriction with orientations
425*7c1dbaffSSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
426*7c1dbaffSSebastian Grimberg         for (CeedInt k = 0; k < num_comp; k++) {
427*7c1dbaffSSebastian Grimberg           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
428*7c1dbaffSSebastian Grimberg             // Iteration bound set to discard padding elements
429*7c1dbaffSSebastian Grimberg             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
430*7c1dbaffSSebastian Grimberg               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
431*7c1dbaffSSebastian Grimberg             }
432*7c1dbaffSSebastian Grimberg           }
433*7c1dbaffSSebastian Grimberg         }
434*7c1dbaffSSebastian Grimberg       }
435*7c1dbaffSSebastian Grimberg     } break;
436*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_CURL_ORIENTED: {
437*7c1dbaffSSebastian Grimberg       // Restriction with tridiagonal transformation
438*7c1dbaffSSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
439*7c1dbaffSSebastian Grimberg         for (CeedInt k = 0; k < num_comp; k++) {
440*7c1dbaffSSebastian Grimberg           // Iteration bound set to discard padding elements
441*7c1dbaffSSebastian Grimberg           CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0;
442*7c1dbaffSSebastian Grimberg           for (CeedInt j = 0; j < blk_end; j++) {
443*7c1dbaffSSebastian Grimberg             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
444*7c1dbaffSSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
445*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
446*7c1dbaffSSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
447*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]);
448*7c1dbaffSSebastian Grimberg           }
449*7c1dbaffSSebastian Grimberg           for (n = 1; n < elem_size - 1; n++) {
450*7c1dbaffSSebastian Grimberg             for (CeedInt j = 0; j < blk_end; j++) {
451*7c1dbaffSSebastian Grimberg               vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
452*7c1dbaffSSebastian Grimberg                   uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
453*7c1dbaffSSebastian Grimberg                       abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) +
454*7c1dbaffSSebastian Grimberg                   uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
455*7c1dbaffSSebastian Grimberg                       abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) +
456*7c1dbaffSSebastian Grimberg                   uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] *
457*7c1dbaffSSebastian Grimberg                       abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]);
458*7c1dbaffSSebastian Grimberg             }
459*7c1dbaffSSebastian Grimberg           }
460*7c1dbaffSSebastian Grimberg           for (CeedInt j = 0; j < blk_end; j++) {
461*7c1dbaffSSebastian Grimberg             vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] +=
462*7c1dbaffSSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] *
463*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) +
464*7c1dbaffSSebastian Grimberg                 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] *
465*7c1dbaffSSebastian Grimberg                     abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]);
466*7c1dbaffSSebastian Grimberg           }
467*7c1dbaffSSebastian Grimberg         }
468*7c1dbaffSSebastian Grimberg       }
469*7c1dbaffSSebastian Grimberg     } break;
470*7c1dbaffSSebastian Grimberg     // LCOV_EXCL_START
471*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_STRIDED:
472*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
473*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_DEFAULT:
474*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
475*7c1dbaffSSebastian Grimberg       // LCOV_EXCL_STOP
476*7c1dbaffSSebastian Grimberg   }
477*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
478*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
479*7c1dbaffSSebastian Grimberg   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
480*7c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
481*7c1dbaffSSebastian Grimberg }
482*7c1dbaffSSebastian Grimberg 
483*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
484*7c1dbaffSSebastian Grimberg // Core Unoriented ElemRestriction Apply Transpose Code
485*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
486*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size,
487*7c1dbaffSSebastian Grimberg                                                                        const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u,
488*7c1dbaffSSebastian Grimberg                                                                        CeedVector v, CeedRequest *request) {
489*7c1dbaffSSebastian Grimberg   Ceed ceed;
490*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
491*7c1dbaffSSebastian Grimberg   CeedElemRestriction_Ref *impl;
492*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
493*7c1dbaffSSebastian Grimberg   const CeedScalar *uu;
494*7c1dbaffSSebastian Grimberg   CeedScalar       *vv;
495*7c1dbaffSSebastian Grimberg   CeedInt           num_elem, elem_size, v_offset;
496*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
497*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
498*7c1dbaffSSebastian Grimberg   v_offset = start * blk_size * elem_size * num_comp;
499*7c1dbaffSSebastian Grimberg   CeedRestrictionType rstr_type;
500*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
501*7c1dbaffSSebastian Grimberg 
502*7c1dbaffSSebastian Grimberg   // Restriction from E-vector to L-vector
503*7c1dbaffSSebastian Grimberg   // Performing v += r^T * u
504*7c1dbaffSSebastian Grimberg   // uu has shape [elem_size, num_comp, num_elem], row-major
505*7c1dbaffSSebastian Grimberg   // vv has shape [nnodes, num_comp]
506*7c1dbaffSSebastian Grimberg   // Sum into for transpose mode
507*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
508*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
509*7c1dbaffSSebastian Grimberg   switch (rstr_type) {
510*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_CURL_ORIENTED: {
511*7c1dbaffSSebastian Grimberg       // Restriction with orientations
512*7c1dbaffSSebastian Grimberg       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
513*7c1dbaffSSebastian Grimberg         for (CeedInt k = 0; k < num_comp; k++) {
514*7c1dbaffSSebastian Grimberg           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
515*7c1dbaffSSebastian Grimberg             // Iteration bound set to discard padding elements
516*7c1dbaffSSebastian Grimberg             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
517*7c1dbaffSSebastian Grimberg               vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset];
518*7c1dbaffSSebastian Grimberg             }
519*7c1dbaffSSebastian Grimberg           }
520*7c1dbaffSSebastian Grimberg         }
521*7c1dbaffSSebastian Grimberg       }
522*7c1dbaffSSebastian Grimberg     } break;
523*7c1dbaffSSebastian Grimberg     // LCOV_EXCL_START
524*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_STRIDED:
525*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported");
526*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_DEFAULT:
527*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported");
528*7c1dbaffSSebastian Grimberg     case CEED_RESTRICTION_ORIENTED:
529*7c1dbaffSSebastian Grimberg       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported");
530*7c1dbaffSSebastian Grimberg       // LCOV_EXCL_STOP
531*7c1dbaffSSebastian Grimberg   }
532*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
533*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
534*7c1dbaffSSebastian Grimberg   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
535*7c1dbaffSSebastian Grimberg   return CEED_ERROR_SUCCESS;
536*7c1dbaffSSebastian Grimberg }
537*7c1dbaffSSebastian Grimberg 
538*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
539f10650afSjeremylt // ElemRestriction Apply - Common Sizes
540f10650afSjeremylt //------------------------------------------------------------------------------
541*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
542*7c1dbaffSSebastian Grimberg                                                     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients,
543*7c1dbaffSSebastian Grimberg                                                     CeedVector u, CeedVector v, CeedRequest *request) {
544*7c1dbaffSSebastian Grimberg   CeedRestrictionType rstr_type;
545*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
5460c73c039SSebastian Grimberg   if (t_mode == CEED_TRANSPOSE) {
547*7c1dbaffSSebastian Grimberg     switch (rstr_type) {
548*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_STRIDED:
549*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_DEFAULT:
550*7c1dbaffSSebastian Grimberg         return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
551*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_ORIENTED:
552*7c1dbaffSSebastian Grimberg         if (use_signs) {
553*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
5540c73c039SSebastian Grimberg         } else {
555*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
5560c73c039SSebastian Grimberg         }
557*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_CURL_ORIENTED:
558*7c1dbaffSSebastian Grimberg         if (use_signs && use_orients) {
559*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
560*7c1dbaffSSebastian Grimberg         } else if (use_orients) {
561*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
562*7c1dbaffSSebastian Grimberg         } else {
563*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
564*7c1dbaffSSebastian Grimberg         }
565*7c1dbaffSSebastian Grimberg     }
566*7c1dbaffSSebastian Grimberg   } else {
567*7c1dbaffSSebastian Grimberg     switch (rstr_type) {
568*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_STRIDED:
569*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_DEFAULT:
570*7c1dbaffSSebastian Grimberg         return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
571*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_ORIENTED:
572*7c1dbaffSSebastian Grimberg         if (use_signs) {
573*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
574*7c1dbaffSSebastian Grimberg         } else {
575*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
576*7c1dbaffSSebastian Grimberg         }
577*7c1dbaffSSebastian Grimberg       case CEED_RESTRICTION_CURL_ORIENTED:
578*7c1dbaffSSebastian Grimberg         if (use_signs && use_orients) {
579*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
580*7c1dbaffSSebastian Grimberg         } else if (use_orients) {
581*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
582*7c1dbaffSSebastian Grimberg         } else {
583*7c1dbaffSSebastian Grimberg           return CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request);
584*7c1dbaffSSebastian Grimberg         }
585*7c1dbaffSSebastian Grimberg     }
586*7c1dbaffSSebastian Grimberg   }
587*7c1dbaffSSebastian Grimberg }
588*7c1dbaffSSebastian Grimberg 
589*7c1dbaffSSebastian Grimberg static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
590*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
591*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
592*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
593d979a051Sjeremylt }
594d979a051Sjeremylt 
5952b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
596*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
597*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
598*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
5994d2a38eeSjeremylt }
6004d2a38eeSjeremylt 
6012b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
602*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
603*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
604*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
6059c36149bSjeremylt }
6069c36149bSjeremylt 
6072b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
608*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
609*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
610*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
6119c36149bSjeremylt }
6129c36149bSjeremylt 
6132b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
614*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
615*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
616*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
617d979a051Sjeremylt }
618d979a051Sjeremylt 
6192b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
620*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
621*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
622*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
623d979a051Sjeremylt }
624d979a051Sjeremylt 
6252b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
626*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
627*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
628*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
629d979a051Sjeremylt }
630d979a051Sjeremylt 
6312b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
632*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
633*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
634*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
635d979a051Sjeremylt }
636d979a051Sjeremylt 
637bf4d1581Sjeremylt // LCOV_EXCL_START
6382b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
639*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
640*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
641*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
642d979a051Sjeremylt }
643bf4d1581Sjeremylt // LCOV_EXCL_STOP
644d979a051Sjeremylt 
6452b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
646*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
647*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
648*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
649d979a051Sjeremylt }
650d979a051Sjeremylt 
651bf4d1581Sjeremylt // LCOV_EXCL_START
6522b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
653*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
654*7c1dbaffSSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
655*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request);
656d979a051Sjeremylt }
657bf4d1581Sjeremylt // LCOV_EXCL_STOP
658d979a051Sjeremylt 
6592b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
660*7c1dbaffSSebastian Grimberg                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u,
6610c73c039SSebastian Grimberg                                             CeedVector v, CeedRequest *request) {
662*7c1dbaffSSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request);
6634d2a38eeSjeremylt }
6644d2a38eeSjeremylt 
665f10650afSjeremylt //------------------------------------------------------------------------------
666f10650afSjeremylt // ElemRestriction Apply
667f10650afSjeremylt //------------------------------------------------------------------------------
6682b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
669d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
6702b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
6712b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
6722b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
6732b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
6747509a596Sjeremylt   CeedElemRestriction_Ref *impl;
6752b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
6764d2a38eeSjeremylt 
677*7c1dbaffSSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, true, true, u, v, request);
678f30b1135SSebastian Grimberg }
679f30b1135SSebastian Grimberg 
680f30b1135SSebastian Grimberg //------------------------------------------------------------------------------
681f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned
682f30b1135SSebastian Grimberg //------------------------------------------------------------------------------
683f30b1135SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
684f30b1135SSebastian Grimberg   CeedInt num_blk, blk_size, num_comp, comp_stride;
685f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
686f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
687f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
688f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
689f30b1135SSebastian Grimberg   CeedElemRestriction_Ref *impl;
690f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
691f30b1135SSebastian Grimberg 
692*7c1dbaffSSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, true, u, v, request);
693*7c1dbaffSSebastian Grimberg }
694*7c1dbaffSSebastian Grimberg 
695*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
696*7c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented
697*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------
698*7c1dbaffSSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
699*7c1dbaffSSebastian Grimberg   CeedInt num_blk, blk_size, num_comp, comp_stride;
700*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
701*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
702*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
703*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
704*7c1dbaffSSebastian Grimberg   CeedElemRestriction_Ref *impl;
705*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
706*7c1dbaffSSebastian Grimberg 
707*7c1dbaffSSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, false, u, v, request);
7089c36149bSjeremylt }
709be9261b7Sjeremylt 
710f10650afSjeremylt //------------------------------------------------------------------------------
711f10650afSjeremylt // ElemRestriction Apply Block
712f10650afSjeremylt //------------------------------------------------------------------------------
7132b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
714074cb416Sjeremylt                                              CeedRequest *request) {
715d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
7162b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
7172b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
7182b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
7197509a596Sjeremylt   CeedElemRestriction_Ref *impl;
7202b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
7214d2a38eeSjeremylt 
722*7c1dbaffSSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request);
7239c36149bSjeremylt }
724be9261b7Sjeremylt 
725f10650afSjeremylt //------------------------------------------------------------------------------
726bd33150aSjeremylt // ElemRestriction Get Offsets
727bd33150aSjeremylt //------------------------------------------------------------------------------
7282b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
729bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
7302b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
731bd33150aSjeremylt   Ceed ceed;
7322b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
733bd33150aSjeremylt 
7346574a04fSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
735bd33150aSjeremylt 
736bd33150aSjeremylt   *offsets = impl->offsets;
737e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
738bd33150aSjeremylt }
739bd33150aSjeremylt 
740bd33150aSjeremylt //------------------------------------------------------------------------------
74177d1c127SSebastian Grimberg // ElemRestriction Get Orientations
74277d1c127SSebastian Grimberg //------------------------------------------------------------------------------
74377d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
74477d1c127SSebastian Grimberg   CeedElemRestriction_Ref *impl;
74577d1c127SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
74677d1c127SSebastian Grimberg   Ceed ceed;
74777d1c127SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
74877d1c127SSebastian Grimberg 
749fcbe8c06SSebastian Grimberg   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
75077d1c127SSebastian Grimberg 
75177d1c127SSebastian Grimberg   *orients = impl->orients;
75277d1c127SSebastian Grimberg   return CEED_ERROR_SUCCESS;
75377d1c127SSebastian Grimberg }
75477d1c127SSebastian Grimberg 
75577d1c127SSebastian Grimberg //------------------------------------------------------------------------------
75677d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations
75777d1c127SSebastian Grimberg //------------------------------------------------------------------------------
7580c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
75977d1c127SSebastian Grimberg   CeedElemRestriction_Ref *impl;
76077d1c127SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
76177d1c127SSebastian Grimberg   Ceed ceed;
76277d1c127SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
76377d1c127SSebastian Grimberg 
764fcbe8c06SSebastian Grimberg   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
76577d1c127SSebastian Grimberg 
76677d1c127SSebastian Grimberg   *curl_orients = impl->curl_orients;
76777d1c127SSebastian Grimberg   return CEED_ERROR_SUCCESS;
76877d1c127SSebastian Grimberg }
76977d1c127SSebastian Grimberg 
77077d1c127SSebastian Grimberg //------------------------------------------------------------------------------
771f10650afSjeremylt // ElemRestriction Destroy
772f10650afSjeremylt //------------------------------------------------------------------------------
77321617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
774fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
7752b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
77621617c04Sjeremylt 
7772b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
77877d1c127SSebastian Grimberg   CeedCallBackend(CeedFree(&impl->orients_allocated));
77977d1c127SSebastian Grimberg   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
7802b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
781e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
78221617c04Sjeremylt }
78321617c04Sjeremylt 
784f10650afSjeremylt //------------------------------------------------------------------------------
785f10650afSjeremylt // ElemRestriction Create
786f10650afSjeremylt //------------------------------------------------------------------------------
787fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
7880c73c039SSebastian Grimberg                                   const CeedInt8 *curl_orients, CeedElemRestriction r) {
78921617c04Sjeremylt   CeedElemRestriction_Ref *impl;
790d1d35e2fSjeremylt   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
7912b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
7922b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
7932b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
7942b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
7952b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
7962b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
7974ce2993fSjeremylt   Ceed ceed;
7982b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
79921617c04Sjeremylt 
8006574a04fSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
8012b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
8023661185eSjeremylt 
80392fe105eSJeremy L Thompson   // Offsets data
804fcbe8c06SSebastian Grimberg   CeedRestrictionType rstr_type;
805fcbe8c06SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type));
806fcbe8c06SSebastian Grimberg   if (rstr_type != CEED_RESTRICTION_STRIDED) {
80792fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
808d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
809d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
810d1d35e2fSjeremylt       curr_ceed = parent_ceed;
8112b730f8bSJeremy L Thompson       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
8123661185eSjeremylt     }
8133661185eSjeremylt     const char *resource;
8142b730f8bSJeremy L Thompson     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
8152b730f8bSJeremy L Thompson     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
816d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
817e79b91d9SJeremy L Thompson       CeedSize l_size;
8182b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
8193661185eSjeremylt 
8202b730f8bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
8216574a04fSJeremy L Thompson         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
8226574a04fSJeremy L Thompson                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
8232b730f8bSJeremy L Thompson       }
8242b730f8bSJeremy L Thompson     }
8253661185eSjeremylt 
82692fe105eSJeremy L Thompson     // Copy data
827d1d35e2fSjeremylt     switch (copy_mode) {
82821617c04Sjeremylt       case CEED_COPY_VALUES:
8292b730f8bSJeremy L Thompson         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
8302b730f8bSJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
831d979a051Sjeremylt         impl->offsets = impl->offsets_allocated;
83221617c04Sjeremylt         break;
83321617c04Sjeremylt       case CEED_OWN_POINTER:
834d979a051Sjeremylt         impl->offsets_allocated = (CeedInt *)offsets;
835d979a051Sjeremylt         impl->offsets           = impl->offsets_allocated;
83621617c04Sjeremylt         break;
83721617c04Sjeremylt       case CEED_USE_POINTER:
838d979a051Sjeremylt         impl->offsets = offsets;
83921617c04Sjeremylt     }
840fcbe8c06SSebastian Grimberg 
841fcbe8c06SSebastian Grimberg     // Orientation data
842fcbe8c06SSebastian Grimberg     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
8430305e208SSebastian Grimberg       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
844fcbe8c06SSebastian Grimberg       switch (copy_mode) {
845fcbe8c06SSebastian Grimberg         case CEED_COPY_VALUES:
846fcbe8c06SSebastian Grimberg           CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated));
847fcbe8c06SSebastian Grimberg           memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0]));
848fcbe8c06SSebastian Grimberg           impl->orients = impl->orients_allocated;
849fcbe8c06SSebastian Grimberg           break;
850fcbe8c06SSebastian Grimberg         case CEED_OWN_POINTER:
851fcbe8c06SSebastian Grimberg           impl->orients_allocated = (bool *)orients;
852fcbe8c06SSebastian Grimberg           impl->orients           = impl->orients_allocated;
853fcbe8c06SSebastian Grimberg           break;
854fcbe8c06SSebastian Grimberg         case CEED_USE_POINTER:
855fcbe8c06SSebastian Grimberg           impl->orients = orients;
856fcbe8c06SSebastian Grimberg       }
857fcbe8c06SSebastian Grimberg     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
8580305e208SSebastian Grimberg       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
859fcbe8c06SSebastian Grimberg       switch (copy_mode) {
860fcbe8c06SSebastian Grimberg         case CEED_COPY_VALUES:
861fcbe8c06SSebastian Grimberg           CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated));
862fcbe8c06SSebastian Grimberg           memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0]));
863fcbe8c06SSebastian Grimberg           impl->curl_orients = impl->curl_orients_allocated;
864fcbe8c06SSebastian Grimberg           break;
865fcbe8c06SSebastian Grimberg         case CEED_OWN_POINTER:
8660c73c039SSebastian Grimberg           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
867fcbe8c06SSebastian Grimberg           impl->curl_orients           = impl->curl_orients_allocated;
868fcbe8c06SSebastian Grimberg           break;
869fcbe8c06SSebastian Grimberg         case CEED_USE_POINTER:
870fcbe8c06SSebastian Grimberg           impl->curl_orients = curl_orients;
871fcbe8c06SSebastian Grimberg       }
872fcbe8c06SSebastian Grimberg     }
87392fe105eSJeremy L Thompson   }
874fe2413ffSjeremylt 
8752b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
876d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
8772b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
8782b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
879f30b1135SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
880*7c1dbaffSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref));
8812b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
8822b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
88377d1c127SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref));
88477d1c127SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref));
8852b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
886d979a051Sjeremylt 
887d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
888d979a051Sjeremylt   CeedInt idx = -1;
8892b730f8bSJeremy L Thompson   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
890d979a051Sjeremylt   switch (idx) {
891d979a051Sjeremylt     case 110:
892d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_110;
893d979a051Sjeremylt       break;
894d979a051Sjeremylt     case 111:
895d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_111;
896d979a051Sjeremylt       break;
897d979a051Sjeremylt     case 180:
898d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_180;
899d979a051Sjeremylt       break;
900d979a051Sjeremylt     case 181:
901d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_181;
902d979a051Sjeremylt       break;
903d979a051Sjeremylt     case 310:
904d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_310;
905d979a051Sjeremylt       break;
906d979a051Sjeremylt     case 311:
907d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_311;
908d979a051Sjeremylt       break;
909d979a051Sjeremylt     case 380:
910d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_380;
911d979a051Sjeremylt       break;
912d979a051Sjeremylt     case 381:
913d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_381;
914d979a051Sjeremylt       break;
915bf4d1581Sjeremylt     // LCOV_EXCL_START
916d979a051Sjeremylt     case 510:
917d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_510;
918d979a051Sjeremylt       break;
919bf4d1581Sjeremylt     // LCOV_EXCL_STOP
920d979a051Sjeremylt     case 511:
921d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_511;
922d979a051Sjeremylt       break;
923bf4d1581Sjeremylt     // LCOV_EXCL_START
924d979a051Sjeremylt     case 580:
925d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_580;
926d979a051Sjeremylt       break;
927bf4d1581Sjeremylt     // LCOV_EXCL_STOP
928d979a051Sjeremylt     case 581:
929d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_581;
930d979a051Sjeremylt       break;
931d979a051Sjeremylt     default:
932d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_Core;
933d979a051Sjeremylt       break;
934d979a051Sjeremylt   }
935d979a051Sjeremylt 
936e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
93721617c04Sjeremylt }
938fc0567d9Srezgarshakeri 
939fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
940