xref: /libCEED/backends/memcheck/ceed-memcheck-restriction.c (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
29e82028bSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
39e82028bSJeremy L Thompson //
49e82028bSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
59e82028bSJeremy L Thompson //
69e82028bSJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
79e82028bSJeremy L Thompson 
89e82028bSJeremy L Thompson #include <ceed.h>
99e82028bSJeremy L Thompson #include <ceed/backend.h>
109e82028bSJeremy L Thompson #include <stdbool.h>
119e82028bSJeremy L Thompson #include <stdlib.h>
129e82028bSJeremy L Thompson #include <string.h>
139e82028bSJeremy L Thompson 
149e82028bSJeremy L Thompson #include "ceed-memcheck.h"
159e82028bSJeremy L Thompson 
169e82028bSJeremy L Thompson //------------------------------------------------------------------------------
179e82028bSJeremy L Thompson // Set backend strides
189e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr,CeedInt strides[3])199e82028bSJeremy L Thompson static inline int CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr, CeedInt strides[3]) {
209e82028bSJeremy L Thompson   CeedInt elem_size, num_comp, num_elem;
219e82028bSJeremy L Thompson 
229e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
239e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
249e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
259e82028bSJeremy L Thompson   // Memcheck default, contiguous by component, then node
269e82028bSJeremy L Thompson   strides[0] = num_comp;
279e82028bSJeremy L Thompson   strides[1] = 1;
289e82028bSJeremy L Thompson   strides[2] = num_comp * elem_size;
299e82028bSJeremy L Thompson   /**
309e82028bSJeremy L Thompson       // CPU default, contiguous by node, then component
319e82028bSJeremy L Thompson       strides[0] = 1;
329e82028bSJeremy L Thompson       strides[1] = elem_size;
339e82028bSJeremy L Thompson       strides[2] = elem_size * num_comp;
349e82028bSJeremy L Thompson 
359e82028bSJeremy L Thompson       // GPU default, contiguous by node, then element
369e82028bSJeremy L Thompson       strides[0] = 1;
379e82028bSJeremy L Thompson       strides[1] = num_elem * elem_size;
389e82028bSJeremy L Thompson       strides[2] = elem_size;
399e82028bSJeremy L Thompson      **/
409e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
419e82028bSJeremy L Thompson }
429e82028bSJeremy L Thompson 
439e82028bSJeremy L Thompson //------------------------------------------------------------------------------
449e82028bSJeremy L Thompson // Core ElemRestriction Apply Code
459e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)469e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
479e82028bSJeremy L Thompson                                                                            CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
48740363ccSJeremy L Thompson                                                                            CeedSize v_offset, const CeedScalar *__restrict__ uu,
499e82028bSJeremy L Thompson                                                                            CeedScalar *__restrict__ vv) {
509e82028bSJeremy L Thompson   // Get strides
519e82028bSJeremy L Thompson   bool    has_backend_strides;
529e82028bSJeremy L Thompson   CeedInt strides[3] = {0};
539e82028bSJeremy L Thompson 
549e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
559e82028bSJeremy L Thompson   if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides));
5656c48462SJeremy L Thompson   else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
579e82028bSJeremy L Thompson 
589e82028bSJeremy L Thompson   // Apply restriction
5933e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
604baa7aecSJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
614baa7aecSJeremy L Thompson       for (CeedSize n = 0; n < elem_size; n++) {
6233e3c889SJeremy L Thompson         CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
6333e3c889SJeremy L Thompson           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
6433e3c889SJeremy L Thompson               uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]];
659e82028bSJeremy L Thompson         }
669e82028bSJeremy L Thompson       }
679e82028bSJeremy L Thompson     }
689e82028bSJeremy L Thompson   }
699e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
709e82028bSJeremy L Thompson }
719e82028bSJeremy L Thompson 
CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)729e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
739e82028bSJeremy L Thompson                                                                           const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
74740363ccSJeremy L Thompson                                                                           CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
759e82028bSJeremy L Thompson                                                                           CeedScalar *__restrict__ vv) {
769e82028bSJeremy L Thompson   // Default restriction with offsets
779e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
789e82028bSJeremy L Thompson 
799e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
8033e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
814baa7aecSJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
8233e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
8333e3c889SJeremy L Thompson         vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride];
849e82028bSJeremy L Thompson       }
859e82028bSJeremy L Thompson     }
869e82028bSJeremy L Thompson   }
879e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
889e82028bSJeremy L Thompson }
899e82028bSJeremy L Thompson 
CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)909e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
919e82028bSJeremy L Thompson                                                                             const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
92740363ccSJeremy L Thompson                                                                             CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
939e82028bSJeremy L Thompson                                                                             const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
949e82028bSJeremy L Thompson   // Restriction with orientations
959e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
969e82028bSJeremy L Thompson 
979e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
9833e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
994baa7aecSJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
10033e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) {
10133e3c889SJeremy L Thompson         vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] =
1029e82028bSJeremy L Thompson             uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0);
1039e82028bSJeremy L Thompson       }
1049e82028bSJeremy L Thompson     }
1059e82028bSJeremy L Thompson   }
1069e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1079e82028bSJeremy L Thompson }
1089e82028bSJeremy L Thompson 
CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)1099e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
1109e82028bSJeremy L Thompson                                                                                 const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
111740363ccSJeremy L Thompson                                                                                 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
1129e82028bSJeremy L Thompson                                                                                 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
1139e82028bSJeremy L Thompson   // Restriction with tridiagonal transformation
1149e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
1159e82028bSJeremy L Thompson 
1169e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
11733e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
1184baa7aecSJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
11933e3c889SJeremy L Thompson       CeedSize n = 0;
1209e82028bSJeremy L Thompson 
12133e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
12233e3c889SJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
1239e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
1249e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
1259e82028bSJeremy L Thompson             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
1269e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
1279e82028bSJeremy L Thompson       }
1284baa7aecSJeremy L Thompson       for (n = 1; n < elem_size - 1; n++) {
12933e3c889SJeremy L Thompson         CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
13033e3c889SJeremy L Thompson           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
1319e82028bSJeremy L Thompson               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
1329e82028bSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
1339e82028bSJeremy L Thompson               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
1349e82028bSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
1359e82028bSJeremy L Thompson               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
1369e82028bSJeremy L Thompson                   impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size];
1379e82028bSJeremy L Thompson         }
1389e82028bSJeremy L Thompson       }
13933e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
14033e3c889SJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
1419e82028bSJeremy L Thompson             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
1429e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] +
1439e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
1449e82028bSJeremy L Thompson                 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
1459e82028bSJeremy L Thompson       }
1469e82028bSJeremy L Thompson     }
1479e82028bSJeremy L Thompson   }
1489e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1499e82028bSJeremy L Thompson }
1509e82028bSJeremy L Thompson 
CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)1519e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(
1529e82028bSJeremy L Thompson     CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop,
153740363ccSJeremy L Thompson     CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
1549e82028bSJeremy L Thompson   // Restriction with (unsigned) tridiagonal transformation
1559e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
1569e82028bSJeremy L Thompson 
1579e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
15833e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
1594baa7aecSJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
16033e3c889SJeremy L Thompson       CeedSize n = 0;
1619e82028bSJeremy L Thompson 
16233e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
16333e3c889SJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
1649e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
1659e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
1669e82028bSJeremy L Thompson             uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
1679e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
1689e82028bSJeremy L Thompson       }
1694baa7aecSJeremy L Thompson       for (n = 1; n < elem_size - 1; n++) {
17033e3c889SJeremy L Thompson         CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
17133e3c889SJeremy L Thompson           vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
1729e82028bSJeremy L Thompson               uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
1739e82028bSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
1749e82028bSJeremy L Thompson               uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
1759e82028bSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
1769e82028bSJeremy L Thompson               uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] *
1779e82028bSJeremy L Thompson                   abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]);
1789e82028bSJeremy L Thompson         }
1799e82028bSJeremy L Thompson       }
18033e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) {
18133e3c889SJeremy L Thompson         vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] =
1829e82028bSJeremy L Thompson             uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] *
1839e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) +
1849e82028bSJeremy L Thompson             uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] *
1859e82028bSJeremy L Thompson                 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
1869e82028bSJeremy L Thompson       }
1879e82028bSJeremy L Thompson     }
1889e82028bSJeremy L Thompson   }
1899e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1909e82028bSJeremy L Thompson }
1919e82028bSJeremy L Thompson 
CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)1929e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
1939e82028bSJeremy L Thompson                                                                          CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size,
194740363ccSJeremy L Thompson                                                                          CeedSize v_offset, const CeedScalar *__restrict__ uu,
1959e82028bSJeremy L Thompson                                                                          CeedScalar *__restrict__ vv) {
1969e82028bSJeremy L Thompson   // Get strides
1979e82028bSJeremy L Thompson   bool    has_backend_strides;
1989e82028bSJeremy L Thompson   CeedInt strides[3] = {0};
1999e82028bSJeremy L Thompson 
2009e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
2019e82028bSJeremy L Thompson   if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides));
20256c48462SJeremy L Thompson   else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides));
2039e82028bSJeremy L Thompson 
2049e82028bSJeremy L Thompson   // Apply restriction
20533e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
2064baa7aecSJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
2074baa7aecSJeremy L Thompson       for (CeedSize n = 0; n < elem_size; n++) {
20833e3c889SJeremy L Thompson         CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) {
20933e3c889SJeremy L Thompson           vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
21033e3c889SJeremy L Thompson               uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset];
2119e82028bSJeremy L Thompson         }
2129e82028bSJeremy L Thompson       }
2139e82028bSJeremy L Thompson     }
2149e82028bSJeremy L Thompson   }
2159e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
2169e82028bSJeremy L Thompson }
2179e82028bSJeremy L Thompson 
CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)2189e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
2199e82028bSJeremy L Thompson                                                                         const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
220740363ccSJeremy L Thompson                                                                         CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
2219e82028bSJeremy L Thompson                                                                         CeedScalar *__restrict__ vv) {
2229e82028bSJeremy L Thompson   // Default restriction with offsets
2239e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
2249e82028bSJeremy L Thompson 
2259e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
22633e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
22733e3c889SJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
22833e3c889SJeremy L Thompson       for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
2299e82028bSJeremy L Thompson         // Iteration bound set to discard padding elements
23033e3c889SJeremy L Thompson         for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
2319e82028bSJeremy L Thompson           CeedScalar vv_loc;
2329e82028bSJeremy L Thompson 
23333e3c889SJeremy L Thompson           vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset];
2349e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
2359e82028bSJeremy L Thompson         }
2369e82028bSJeremy L Thompson       }
2379e82028bSJeremy L Thompson     }
2389e82028bSJeremy L Thompson   }
2399e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
2409e82028bSJeremy L Thompson }
2419e82028bSJeremy L Thompson 
CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)2429e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
2439e82028bSJeremy L Thompson                                                                           const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem,
244740363ccSJeremy L Thompson                                                                           CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu,
2459e82028bSJeremy L Thompson                                                                           CeedScalar *__restrict__ vv) {
2469e82028bSJeremy L Thompson   // Restriction with orientations
2479e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
2489e82028bSJeremy L Thompson 
2499e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
25033e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
25133e3c889SJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
25233e3c889SJeremy L Thompson       for (CeedSize i = 0; i < elem_size * block_size; i += block_size) {
2539e82028bSJeremy L Thompson         // Iteration bound set to discard padding elements
25433e3c889SJeremy L Thompson         for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) {
2559e82028bSJeremy L Thompson           CeedScalar vv_loc;
2569e82028bSJeremy L Thompson 
25733e3c889SJeremy L Thompson           vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0);
2589e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc;
2599e82028bSJeremy L Thompson         }
2609e82028bSJeremy L Thompson       }
2619e82028bSJeremy L Thompson     }
2629e82028bSJeremy L Thompson   }
2639e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
2649e82028bSJeremy L Thompson }
2659e82028bSJeremy L Thompson 
CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)2669e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp,
2679e82028bSJeremy L Thompson                                                                               const CeedInt block_size, const CeedInt comp_stride, CeedInt start,
268740363ccSJeremy L Thompson                                                                               CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset,
2699e82028bSJeremy L Thompson                                                                               const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
2709e82028bSJeremy L Thompson   // Restriction with tridiagonal transformation
2719e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
2729e82028bSJeremy L Thompson   CeedScalar                    vv_loc[block_size];
2739e82028bSJeremy L Thompson 
2749e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
27533e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
27633e3c889SJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
2779e82028bSJeremy L Thompson       // Iteration bound set to discard padding elements
27833e3c889SJeremy L Thompson       const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
27933e3c889SJeremy L Thompson       CeedSize       n         = 0;
2809e82028bSJeremy L Thompson 
28133e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
28233e3c889SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
2839e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
28433e3c889SJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
2859e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
2869e82028bSJeremy L Thompson       }
28733e3c889SJeremy L Thompson       for (CeedSize j = 0; j < block_end; j++) {
2889e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
2899e82028bSJeremy L Thompson       }
2909e82028bSJeremy L Thompson       for (n = 1; n < elem_size - 1; n++) {
29133e3c889SJeremy L Thompson         CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
29233e3c889SJeremy L Thompson           vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
2939e82028bSJeremy L Thompson                           impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
29433e3c889SJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
2959e82028bSJeremy L Thompson                           impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] +
29633e3c889SJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
2979e82028bSJeremy L Thompson                           impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size];
2989e82028bSJeremy L Thompson         }
29933e3c889SJeremy L Thompson         for (CeedSize j = 0; j < block_end; j++) {
3009e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3019e82028bSJeremy L Thompson         }
3029e82028bSJeremy L Thompson       }
30333e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
30433e3c889SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
3059e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] +
30633e3c889SJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
3079e82028bSJeremy L Thompson                         impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size];
3089e82028bSJeremy L Thompson       }
30933e3c889SJeremy L Thompson       for (CeedSize j = 0; j < block_end; j++) {
3109e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3119e82028bSJeremy L Thompson       }
3129e82028bSJeremy L Thompson     }
3139e82028bSJeremy L Thompson   }
3149e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
3159e82028bSJeremy L Thompson }
3169e82028bSJeremy L Thompson 
CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedInt num_elem,CeedInt elem_size,CeedSize v_offset,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)3179e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(
3189e82028bSJeremy L Thompson     CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop,
319740363ccSJeremy L Thompson     CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) {
3209e82028bSJeremy L Thompson   // Restriction with (unsigned) tridiagonal transformation
3219e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
3229e82028bSJeremy L Thompson   CeedScalar                    vv_loc[block_size];
3239e82028bSJeremy L Thompson 
3249e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
32533e3c889SJeremy L Thompson   for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) {
32633e3c889SJeremy L Thompson     for (CeedSize k = 0; k < num_comp; k++) {
3279e82028bSJeremy L Thompson       // Iteration bound set to discard padding elements
32833e3c889SJeremy L Thompson       const CeedSize block_end = CeedIntMin(block_size, num_elem - e);
32933e3c889SJeremy L Thompson       CeedSize       n         = 0;
3309e82028bSJeremy L Thompson 
33133e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
33233e3c889SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
3339e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
33433e3c889SJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
3359e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
3369e82028bSJeremy L Thompson       }
33733e3c889SJeremy L Thompson       for (CeedSize j = 0; j < block_end; j++) {
3389e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3399e82028bSJeremy L Thompson       }
3409e82028bSJeremy L Thompson       for (n = 1; n < elem_size - 1; n++) {
34133e3c889SJeremy L Thompson         CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
34233e3c889SJeremy L Thompson           vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
3439e82028bSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
34433e3c889SJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
3459e82028bSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) +
34633e3c889SJeremy L Thompson                       uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] *
3479e82028bSJeremy L Thompson                           abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]);
3489e82028bSJeremy L Thompson         }
34933e3c889SJeremy L Thompson         for (CeedSize j = 0; j < block_end; j++) {
3509e82028bSJeremy L Thompson           CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3519e82028bSJeremy L Thompson         }
3529e82028bSJeremy L Thompson       }
35333e3c889SJeremy L Thompson       CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) {
35433e3c889SJeremy L Thompson         vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] *
3559e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) +
35633e3c889SJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] *
3579e82028bSJeremy L Thompson                         abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]);
3589e82028bSJeremy L Thompson       }
35933e3c889SJeremy L Thompson       for (CeedSize j = 0; j < block_end; j++) {
3609e82028bSJeremy L Thompson         CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j];
3619e82028bSJeremy L Thompson       }
3629e82028bSJeremy L Thompson     }
3639e82028bSJeremy L Thompson   }
3649e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
3659e82028bSJeremy L Thompson }
3669e82028bSJeremy L Thompson 
CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,const CeedScalar * __restrict__ uu,CeedScalar * __restrict__ vv)3679e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start,
3689e82028bSJeremy L Thompson                                                                           CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu,
3699e82028bSJeremy L Thompson                                                                           CeedScalar *__restrict__ vv) {
370740363ccSJeremy L Thompson   CeedInt                       num_points, l_vec_offset;
371740363ccSJeremy L Thompson   CeedSize                      e_vec_offset = 0;
3729e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
3739e82028bSJeremy L Thompson 
3749e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
37533e3c889SJeremy L Thompson   for (CeedSize e = start; e < stop; e++) {
3769e82028bSJeremy L Thompson     l_vec_offset = impl->offsets[e];
3779e82028bSJeremy L Thompson     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points));
3789e82028bSJeremy L Thompson     if (t_mode == CEED_NOTRANSPOSE) {
37933e3c889SJeremy L Thompson       for (CeedSize i = 0; i < num_points; i++) {
38033e3c889SJeremy L Thompson         for (CeedSize j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j];
3819e82028bSJeremy L Thompson       }
3829e82028bSJeremy L Thompson     } else {
38333e3c889SJeremy L Thompson       for (CeedSize i = 0; i < num_points; i++) {
3840b63de31SJeremy L Thompson         for (CeedSize j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] += uu[j * num_points + i + e_vec_offset];
3859e82028bSJeremy L Thompson       }
3869e82028bSJeremy L Thompson     }
387740363ccSJeremy L Thompson     e_vec_offset += num_points * (CeedSize)num_comp;
3889e82028bSJeremy L Thompson   }
3899e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
3909e82028bSJeremy L Thompson }
3919e82028bSJeremy L Thompson 
CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr,const CeedInt num_comp,const CeedInt block_size,const CeedInt comp_stride,CeedInt start,CeedInt stop,CeedTransposeMode t_mode,bool use_signs,bool use_orients,CeedVector u,CeedVector v,CeedRequest * request)3929e82028bSJeremy L Thompson static inline int CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size,
3939e82028bSJeremy L Thompson                                                          const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode,
3949e82028bSJeremy L Thompson                                                          bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) {
395740363ccSJeremy L Thompson   CeedInt             num_elem, elem_size;
396740363ccSJeremy L Thompson   CeedSize            v_offset;
3979e82028bSJeremy L Thompson   CeedRestrictionType rstr_type;
3989e82028bSJeremy L Thompson   const CeedScalar   *uu;
3999e82028bSJeremy L Thompson   CeedScalar         *vv;
4009e82028bSJeremy L Thompson 
4019e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
4029e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
403740363ccSJeremy L Thompson   v_offset = start * block_size * elem_size * (CeedSize)num_comp;
4049e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
4059e82028bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
4069e82028bSJeremy L Thompson 
4079e82028bSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
4089e82028bSJeremy L Thompson     // Sum into for transpose mode, E-vector to L-vector
4099e82028bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
4109e82028bSJeremy L Thompson   } else {
4119e82028bSJeremy L Thompson     // Overwrite for notranspose mode, L-vector to E-vector
4129e82028bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
4139e82028bSJeremy L Thompson   }
4149e82028bSJeremy L Thompson 
4159e82028bSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
4169e82028bSJeremy L Thompson     // Restriction from E-vector to L-vector
4179e82028bSJeremy L Thompson     // Performing v += r^T * u
4189e82028bSJeremy L Thompson     // uu has shape [elem_size, num_comp, num_elem], row-major
4199e82028bSJeremy L Thompson     // vv has shape [nnodes, num_comp]
4209e82028bSJeremy L Thompson     // Sum into for transpose mode
4219e82028bSJeremy L Thompson     switch (rstr_type) {
4229e82028bSJeremy L Thompson       case CEED_RESTRICTION_STRIDED:
4231a8516d0SJames Wright         CeedCallBackend(CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset,
4241a8516d0SJames Wright                                                                                uu, vv));
4259e82028bSJeremy L Thompson         break;
4269e82028bSJeremy L Thompson       case CEED_RESTRICTION_STANDARD:
4279e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4289e82028bSJeremy L Thompson                                                                               elem_size, v_offset, uu, vv));
4299e82028bSJeremy L Thompson         break;
4309e82028bSJeremy L Thompson       case CEED_RESTRICTION_ORIENTED:
4319e82028bSJeremy L Thompson         if (use_signs) {
4329e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4339e82028bSJeremy L Thompson                                                                                   elem_size, v_offset, uu, vv));
4349e82028bSJeremy L Thompson         } else {
4359e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4369e82028bSJeremy L Thompson                                                                                 elem_size, v_offset, uu, vv));
4379e82028bSJeremy L Thompson         }
4389e82028bSJeremy L Thompson         break;
4399e82028bSJeremy L Thompson       case CEED_RESTRICTION_CURL_ORIENTED:
4409e82028bSJeremy L Thompson         if (use_signs && use_orients) {
4419e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4429e82028bSJeremy L Thompson                                                                                       elem_size, v_offset, uu, vv));
4439e82028bSJeremy L Thompson         } else if (use_orients) {
4449e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
4459e82028bSJeremy L Thompson                                                                                               num_elem, elem_size, v_offset, uu, vv));
4469e82028bSJeremy L Thompson         } else {
4479e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4489e82028bSJeremy L Thompson                                                                                 elem_size, v_offset, uu, vv));
4499e82028bSJeremy L Thompson         }
4509e82028bSJeremy L Thompson         break;
4519e82028bSJeremy L Thompson       case CEED_RESTRICTION_POINTS:
4529e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
4539e82028bSJeremy L Thompson         break;
4549e82028bSJeremy L Thompson     }
4559e82028bSJeremy L Thompson   } else {
4569e82028bSJeremy L Thompson     // Restriction from L-vector to E-vector
4579e82028bSJeremy L Thompson     // Perform: v = r * u
4589e82028bSJeremy L Thompson     // vv has shape [elem_size, num_comp, num_elem], row-major
4599e82028bSJeremy L Thompson     // uu has shape [nnodes, num_comp]
4609e82028bSJeremy L Thompson     // Overwrite for notranspose mode
4619e82028bSJeremy L Thompson     switch (rstr_type) {
4629e82028bSJeremy L Thompson       case CEED_RESTRICTION_STRIDED:
4631a8516d0SJames Wright         CeedCallBackend(CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size,
4641a8516d0SJames Wright                                                                                  v_offset, uu, vv));
4659e82028bSJeremy L Thompson         break;
4669e82028bSJeremy L Thompson       case CEED_RESTRICTION_STANDARD:
4679e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4689e82028bSJeremy L Thompson                                                                                 elem_size, v_offset, uu, vv));
4699e82028bSJeremy L Thompson         break;
4709e82028bSJeremy L Thompson       case CEED_RESTRICTION_ORIENTED:
4719e82028bSJeremy L Thompson         if (use_signs) {
4729e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4739e82028bSJeremy L Thompson                                                                                     elem_size, v_offset, uu, vv));
4749e82028bSJeremy L Thompson         } else {
4759e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4769e82028bSJeremy L Thompson                                                                                   elem_size, v_offset, uu, vv));
4779e82028bSJeremy L Thompson         }
4789e82028bSJeremy L Thompson         break;
4799e82028bSJeremy L Thompson       case CEED_RESTRICTION_CURL_ORIENTED:
4809e82028bSJeremy L Thompson         if (use_signs && use_orients) {
4819e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
4829e82028bSJeremy L Thompson                                                                                         num_elem, elem_size, v_offset, uu, vv));
4839e82028bSJeremy L Thompson         } else if (use_orients) {
4849e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop,
4859e82028bSJeremy L Thompson                                                                                                 num_elem, elem_size, v_offset, uu, vv));
4869e82028bSJeremy L Thompson         } else {
4879e82028bSJeremy L Thompson           CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem,
4889e82028bSJeremy L Thompson                                                                                   elem_size, v_offset, uu, vv));
4899e82028bSJeremy L Thompson         }
4909e82028bSJeremy L Thompson         break;
4919e82028bSJeremy L Thompson       case CEED_RESTRICTION_POINTS:
4929e82028bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv));
4939e82028bSJeremy L Thompson         break;
4949e82028bSJeremy L Thompson     }
4959e82028bSJeremy L Thompson   }
4969e82028bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
4979e82028bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
4989e82028bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
4999e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
5009e82028bSJeremy L Thompson }
5019e82028bSJeremy L Thompson 
5029e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5039e82028bSJeremy L Thompson // ElemRestriction Apply
5049e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)5059e82028bSJeremy L Thompson static int CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
5069e82028bSJeremy L Thompson   CeedInt                       num_block, block_size, num_comp, comp_stride;
5079e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
5089e82028bSJeremy L Thompson 
5099e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
5109e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
5119e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
5129e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
5139e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5149e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request));
5159e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
5169e82028bSJeremy L Thompson }
5179e82028bSJeremy L Thompson 
5189e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5199e82028bSJeremy L Thompson // ElemRestriction Apply Unsigned
5209e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)5219e82028bSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
5229e82028bSJeremy L Thompson                                                      CeedRequest *request) {
5239e82028bSJeremy L Thompson   CeedInt                       num_block, block_size, num_comp, comp_stride;
5249e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
5259e82028bSJeremy L Thompson 
5269e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
5279e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
5289e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
5299e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
5309e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5319e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request));
5329e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
5339e82028bSJeremy L Thompson }
5349e82028bSJeremy L Thompson 
5359e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5369e82028bSJeremy L Thompson // ElemRestriction Apply Unoriented
5379e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)5389e82028bSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
5399e82028bSJeremy L Thompson                                                        CeedRequest *request) {
5409e82028bSJeremy L Thompson   CeedInt                       num_block, block_size, num_comp, comp_stride;
5419e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
5429e82028bSJeremy L Thompson 
5439e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
5449e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
5459e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
5469e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
5479e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5489e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request));
5499e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
5509e82028bSJeremy L Thompson }
5519e82028bSJeremy L Thompson 
5529e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5539e82028bSJeremy L Thompson // ElemRestriction Apply Points
5549e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr,CeedInt elem,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)5559e82028bSJeremy L Thompson static int CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u,
5569e82028bSJeremy L Thompson                                                               CeedVector v, CeedRequest *request) {
5579e82028bSJeremy L Thompson   CeedInt                       num_comp;
5589e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
5599e82028bSJeremy L Thompson 
5609e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
5619e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5629e82028bSJeremy L Thompson   return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request);
5639e82028bSJeremy L Thompson }
5649e82028bSJeremy L Thompson 
5659e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5669e82028bSJeremy L Thompson // ElemRestriction Apply Block
5679e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr,CeedInt block,CeedTransposeMode t_mode,CeedVector u,CeedVector v,CeedRequest * request)5689e82028bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
5699e82028bSJeremy L Thompson                                                   CeedRequest *request) {
5709e82028bSJeremy L Thompson   CeedInt                       block_size, num_comp, comp_stride;
5719e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
5729e82028bSJeremy L Thompson 
5739e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
5749e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
5759e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
5769e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5779e82028bSJeremy L Thompson   CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request));
5789e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
5799e82028bSJeremy L Thompson }
5809e82028bSJeremy L Thompson 
5819e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5829e82028bSJeremy L Thompson // ElemRestriction Get Offsets
5839e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr,CeedMemType mem_type,const CeedInt ** offsets)5849e82028bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
5859e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
5869e82028bSJeremy L Thompson 
5879e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
5889e82028bSJeremy L Thompson 
5896e536b99SJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
5909e82028bSJeremy L Thompson 
5919e82028bSJeremy L Thompson   *offsets = impl->offsets;
5929e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
5939e82028bSJeremy L Thompson }
5949e82028bSJeremy L Thompson 
5959e82028bSJeremy L Thompson //------------------------------------------------------------------------------
5969e82028bSJeremy L Thompson // ElemRestriction Get Orientations
5979e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr,CeedMemType mem_type,const bool ** orients)5989e82028bSJeremy L Thompson static int CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) {
5999e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
6009e82028bSJeremy L Thompson 
6019e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
6029e82028bSJeremy L Thompson 
6036e536b99SJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
6049e82028bSJeremy L Thompson 
6059e82028bSJeremy L Thompson   *orients = impl->orients;
6069e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
6079e82028bSJeremy L Thompson }
6089e82028bSJeremy L Thompson 
6099e82028bSJeremy L Thompson //------------------------------------------------------------------------------
6109e82028bSJeremy L Thompson // ElemRestriction Get Curl-Conforming Orientations
6119e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr,CeedMemType mem_type,const CeedInt8 ** curl_orients)6129e82028bSJeremy L Thompson static int CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) {
6139e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
6149e82028bSJeremy L Thompson 
6159e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
6169e82028bSJeremy L Thompson 
6176e536b99SJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory");
6189e82028bSJeremy L Thompson 
6199e82028bSJeremy L Thompson   *curl_orients = impl->curl_orients;
6209e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
6219e82028bSJeremy L Thompson }
6229e82028bSJeremy L Thompson 
6239e82028bSJeremy L Thompson //------------------------------------------------------------------------------
6249e82028bSJeremy L Thompson // ElemRestriction Destroy
6259e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr)6269e82028bSJeremy L Thompson static int CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr) {
6279e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
6289e82028bSJeremy L Thompson 
6299e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
6309e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
6319e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->orients_allocated));
6329e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->curl_orients_allocated));
6339e82028bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
6349e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
6359e82028bSJeremy L Thompson }
6369e82028bSJeremy L Thompson 
6379e82028bSJeremy L Thompson //------------------------------------------------------------------------------
6389e82028bSJeremy L Thompson // ElemRestriction Create
6399e82028bSJeremy L Thompson //------------------------------------------------------------------------------
CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type,CeedCopyMode copy_mode,const CeedInt * offsets,const bool * orients,const CeedInt8 * curl_orients,CeedElemRestriction rstr)6409e82028bSJeremy L Thompson int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients,
6419e82028bSJeremy L Thompson                                        const CeedInt8 *curl_orients, CeedElemRestriction rstr) {
6429e82028bSJeremy L Thompson   Ceed                          ceed;
6439e82028bSJeremy L Thompson   CeedInt                       num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets;
6449e82028bSJeremy L Thompson   CeedRestrictionType           rstr_type;
6459e82028bSJeremy L Thompson   CeedElemRestriction_Memcheck *impl;
6469e82028bSJeremy L Thompson 
6479e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
6489e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
6499e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
6509e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block));
6519e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size));
6529e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
6539e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
65422eb1385SJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
6559e82028bSJeremy L Thompson 
6569e82028bSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
6579e82028bSJeremy L Thompson 
6589e82028bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
6599e82028bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(rstr, impl));
66022eb1385SJeremy L Thompson 
66122eb1385SJeremy L Thompson   // Set layouts
66222eb1385SJeremy L Thompson   {
66322eb1385SJeremy L Thompson     bool    has_backend_strides;
66422eb1385SJeremy L Thompson     CeedInt e_layout[3] = {1, elem_size, elem_size * num_comp}, l_layout[3] = {0};
66522eb1385SJeremy L Thompson 
66622eb1385SJeremy L Thompson     CeedCallBackend(CeedElemRestrictionSetELayout(rstr, e_layout));
66722eb1385SJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_STRIDED) {
66822eb1385SJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides));
66922eb1385SJeremy L Thompson       if (has_backend_strides) {
67022eb1385SJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, l_layout));
67122eb1385SJeremy L Thompson         CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, l_layout));
67222eb1385SJeremy L Thompson       }
67322eb1385SJeremy L Thompson     }
67422eb1385SJeremy L Thompson   }
6759e82028bSJeremy L Thompson 
676ff1bc20eSJeremy L Thompson   // Expand E-vector size for AtPoints
677ff1bc20eSJeremy L Thompson   if (rstr_type == CEED_RESTRICTION_POINTS) {
678ff1bc20eSJeremy L Thompson     CeedSize max_points = 0, num_points_total = 0;
679ff1bc20eSJeremy L Thompson 
680ff1bc20eSJeremy L Thompson     for (CeedInt i = 0; i < num_elem; i++) {
681ff1bc20eSJeremy L Thompson       CeedInt num_points = offsets[i + 1] - offsets[i];
682ff1bc20eSJeremy L Thompson 
683ff1bc20eSJeremy L Thompson       max_points = CeedIntMax(max_points, num_points);
684ff1bc20eSJeremy L Thompson       num_points_total += num_points;
685ff1bc20eSJeremy L Thompson     }
686ff1bc20eSJeremy L Thompson     // -- Increase size for last element
687ff1bc20eSJeremy L Thompson     num_points_total += (max_points - (offsets[num_elem] - offsets[num_elem - 1]));
688ff1bc20eSJeremy L Thompson     CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, num_points_total * num_comp));
689ff1bc20eSJeremy L Thompson   }
690ff1bc20eSJeremy L Thompson 
6919e82028bSJeremy L Thompson   // Offsets data
6929e82028bSJeremy L Thompson   if (rstr_type != CEED_RESTRICTION_STRIDED) {
6931ba74105SJeremy L Thompson     // Check indices
6949e82028bSJeremy L Thompson     {
6959e82028bSJeremy L Thompson       CeedSize l_size;
6969e82028bSJeremy L Thompson 
6979e82028bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
6989e82028bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
6999e82028bSJeremy L Thompson         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
7009e82028bSJeremy L Thompson                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
7019e82028bSJeremy L Thompson       }
7029e82028bSJeremy L Thompson     }
7039e82028bSJeremy L Thompson 
7049e82028bSJeremy L Thompson     // Copy data
7059e82028bSJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points));
7069e82028bSJeremy L Thompson     num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size);
7079e82028bSJeremy L Thompson     switch (copy_mode) {
7089e82028bSJeremy L Thompson       case CEED_COPY_VALUES:
7099e82028bSJeremy L Thompson         CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated));
7109e82028bSJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0]));
7119e82028bSJeremy L Thompson         impl->offsets = impl->offsets_allocated;
7129e82028bSJeremy L Thompson         break;
7139e82028bSJeremy L Thompson       case CEED_OWN_POINTER:
7149e82028bSJeremy L Thompson         impl->offsets_allocated = (CeedInt *)offsets;
7159e82028bSJeremy L Thompson         impl->offsets           = impl->offsets_allocated;
7169e82028bSJeremy L Thompson         break;
7179e82028bSJeremy L Thompson       case CEED_USE_POINTER:
7189e82028bSJeremy L Thompson         impl->offsets = offsets;
7199e82028bSJeremy L Thompson     }
7209e82028bSJeremy L Thompson 
7219e82028bSJeremy L Thompson     // Orientation data
7229e82028bSJeremy L Thompson     if (rstr_type == CEED_RESTRICTION_ORIENTED) {
7239e82028bSJeremy L Thompson       CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction");
7249e82028bSJeremy L Thompson       switch (copy_mode) {
7259e82028bSJeremy L Thompson         case CEED_COPY_VALUES:
7269e82028bSJeremy L Thompson           CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated));
7279e82028bSJeremy L Thompson           memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0]));
7289e82028bSJeremy L Thompson           impl->orients = impl->orients_allocated;
7299e82028bSJeremy L Thompson           break;
7309e82028bSJeremy L Thompson         case CEED_OWN_POINTER:
7319e82028bSJeremy L Thompson           impl->orients_allocated = (bool *)orients;
7329e82028bSJeremy L Thompson           impl->orients           = impl->orients_allocated;
7339e82028bSJeremy L Thompson           break;
7349e82028bSJeremy L Thompson         case CEED_USE_POINTER:
7359e82028bSJeremy L Thompson           impl->orients = orients;
7369e82028bSJeremy L Thompson       }
7379e82028bSJeremy L Thompson     } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
7389e82028bSJeremy L Thompson       CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction");
7399e82028bSJeremy L Thompson       switch (copy_mode) {
7409e82028bSJeremy L Thompson         case CEED_COPY_VALUES:
7419e82028bSJeremy L Thompson           CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated));
7429e82028bSJeremy L Thompson           memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0]));
7439e82028bSJeremy L Thompson           impl->curl_orients = impl->curl_orients_allocated;
7449e82028bSJeremy L Thompson           break;
7459e82028bSJeremy L Thompson         case CEED_OWN_POINTER:
7469e82028bSJeremy L Thompson           impl->curl_orients_allocated = (CeedInt8 *)curl_orients;
7479e82028bSJeremy L Thompson           impl->curl_orients           = impl->curl_orients_allocated;
7489e82028bSJeremy L Thompson           break;
7499e82028bSJeremy L Thompson         case CEED_USE_POINTER:
7509e82028bSJeremy L Thompson           impl->curl_orients = curl_orients;
7519e82028bSJeremy L Thompson       }
7529e82028bSJeremy L Thompson     }
7539e82028bSJeremy L Thompson   }
7549e82028bSJeremy L Thompson 
7559e82028bSJeremy L Thompson   // Set apply function
7569e82028bSJeremy L Thompson   impl->Apply = CeedElemRestrictionApply_Memcheck_Core;
7579e82028bSJeremy L Thompson 
7589e82028bSJeremy L Thompson   // Register backend functions
7599e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Memcheck));
7609e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Memcheck));
7619e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Memcheck));
7629e82028bSJeremy L Thompson   if (rstr_type == CEED_RESTRICTION_POINTS) {
7631a8516d0SJames Wright     CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement",
7641a8516d0SJames Wright                                            CeedElemRestrictionApplyAtPointsInElement_Memcheck));
7659e82028bSJeremy L Thompson   }
7669e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Memcheck));
7679e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Memcheck));
7689e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Memcheck));
7699e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Memcheck));
7709e82028bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Memcheck));
7719bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
7729e82028bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
7739e82028bSJeremy L Thompson }
7749e82028bSJeremy L Thompson 
7759e82028bSJeremy L Thompson //------------------------------------------------------------------------------
776