1*9e82028bSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*9e82028bSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*9e82028bSJeremy L Thompson // 4*9e82028bSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 5*9e82028bSJeremy L Thompson // 6*9e82028bSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7*9e82028bSJeremy L Thompson 8*9e82028bSJeremy L Thompson #include <ceed.h> 9*9e82028bSJeremy L Thompson #include <ceed/backend.h> 10*9e82028bSJeremy L Thompson #include <stdbool.h> 11*9e82028bSJeremy L Thompson #include <stdlib.h> 12*9e82028bSJeremy L Thompson #include <string.h> 13*9e82028bSJeremy L Thompson 14*9e82028bSJeremy L Thompson #include "ceed-memcheck.h" 15*9e82028bSJeremy L Thompson 16*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 17*9e82028bSJeremy L Thompson // Set backend strides 18*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 19*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr, CeedInt strides[3]) { 20*9e82028bSJeremy L Thompson CeedInt elem_size, num_comp, num_elem; 21*9e82028bSJeremy L Thompson 22*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 23*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 24*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 25*9e82028bSJeremy L Thompson // Memcheck default, contiguous by component, then node 26*9e82028bSJeremy L Thompson strides[0] = num_comp; 27*9e82028bSJeremy L Thompson strides[1] = 1; 28*9e82028bSJeremy L Thompson strides[2] = num_comp * elem_size; 29*9e82028bSJeremy L Thompson /** 30*9e82028bSJeremy L Thompson // CPU default, contiguous by node, then component 31*9e82028bSJeremy L Thompson strides[0] = 1; 32*9e82028bSJeremy L Thompson strides[1] = elem_size; 33*9e82028bSJeremy L Thompson strides[2] = elem_size * num_comp; 34*9e82028bSJeremy L Thompson 35*9e82028bSJeremy L Thompson // GPU default, contiguous by node, then element 36*9e82028bSJeremy L Thompson strides[0] = 1; 37*9e82028bSJeremy L Thompson strides[1] = num_elem * elem_size; 38*9e82028bSJeremy L Thompson strides[2] = elem_size; 39*9e82028bSJeremy L Thompson **/ 40*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 41*9e82028bSJeremy L Thompson } 42*9e82028bSJeremy L Thompson 43*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 44*9e82028bSJeremy L Thompson // Core ElemRestriction Apply Code 45*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 46*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 47*9e82028bSJeremy L Thompson CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 48*9e82028bSJeremy L Thompson CeedInt v_offset, const CeedScalar *__restrict__ uu, 49*9e82028bSJeremy L Thompson CeedScalar *__restrict__ vv) { 50*9e82028bSJeremy L Thompson // Get strides 51*9e82028bSJeremy L Thompson bool has_backend_strides; 52*9e82028bSJeremy L Thompson CeedInt strides[3] = {0}; 53*9e82028bSJeremy L Thompson 54*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 55*9e82028bSJeremy L Thompson if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides)); 56*9e82028bSJeremy L Thompson else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 57*9e82028bSJeremy L Thompson 58*9e82028bSJeremy L Thompson // Apply restriction 59*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 60*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 61*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 62*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 63*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 64*9e82028bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 65*9e82028bSJeremy L Thompson } 66*9e82028bSJeremy L Thompson } 67*9e82028bSJeremy L Thompson } 68*9e82028bSJeremy L Thompson } 69*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 70*9e82028bSJeremy L Thompson } 71*9e82028bSJeremy L Thompson 72*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 73*9e82028bSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 74*9e82028bSJeremy L Thompson CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 75*9e82028bSJeremy L Thompson CeedScalar *__restrict__ vv) { 76*9e82028bSJeremy L Thompson // Default restriction with offsets 77*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 78*9e82028bSJeremy L Thompson 79*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 80*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 81*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 82*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 83*9e82028bSJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 84*9e82028bSJeremy L Thompson } 85*9e82028bSJeremy L Thompson } 86*9e82028bSJeremy L Thompson } 87*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 88*9e82028bSJeremy L Thompson } 89*9e82028bSJeremy L Thompson 90*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, 91*9e82028bSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 92*9e82028bSJeremy L Thompson CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 93*9e82028bSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 94*9e82028bSJeremy L Thompson // Restriction with orientations 95*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 96*9e82028bSJeremy L Thompson 97*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 98*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 99*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 100*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 101*9e82028bSJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = 102*9e82028bSJeremy L Thompson uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 103*9e82028bSJeremy L Thompson } 104*9e82028bSJeremy L Thompson } 105*9e82028bSJeremy L Thompson } 106*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 107*9e82028bSJeremy L Thompson } 108*9e82028bSJeremy L Thompson 109*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, 110*9e82028bSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 111*9e82028bSJeremy L Thompson CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 112*9e82028bSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 113*9e82028bSJeremy L Thompson // Restriction with tridiagonal transformation 114*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 115*9e82028bSJeremy L Thompson 116*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 117*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 118*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 119*9e82028bSJeremy L Thompson CeedInt n = 0; 120*9e82028bSJeremy L Thompson 121*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 122*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 123*9e82028bSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 124*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 125*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 126*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 127*9e82028bSJeremy L Thompson } 128*9e82028bSJeremy L Thompson CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 129*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 130*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 131*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 132*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 133*9e82028bSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 134*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 135*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 136*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 137*9e82028bSJeremy L Thompson } 138*9e82028bSJeremy L Thompson } 139*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 140*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 141*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 142*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 143*9e82028bSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 144*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 145*9e82028bSJeremy L Thompson } 146*9e82028bSJeremy L Thompson } 147*9e82028bSJeremy L Thompson } 148*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 149*9e82028bSJeremy L Thompson } 150*9e82028bSJeremy L Thompson 151*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core( 152*9e82028bSJeremy L Thompson CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop, 153*9e82028bSJeremy L Thompson CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 154*9e82028bSJeremy L Thompson // Restriction with (unsigned) tridiagonal transformation 155*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 156*9e82028bSJeremy L Thompson 157*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 158*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 159*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 160*9e82028bSJeremy L Thompson CeedInt n = 0; 161*9e82028bSJeremy L Thompson 162*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 163*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 164*9e82028bSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 165*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 166*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 167*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 168*9e82028bSJeremy L Thompson } 169*9e82028bSJeremy L Thompson CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 170*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 171*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 172*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 173*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 174*9e82028bSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 175*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 176*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 177*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 178*9e82028bSJeremy L Thompson } 179*9e82028bSJeremy L Thompson } 180*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 181*9e82028bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 182*9e82028bSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 183*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 184*9e82028bSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 185*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 186*9e82028bSJeremy L Thompson } 187*9e82028bSJeremy L Thompson } 188*9e82028bSJeremy L Thompson } 189*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 190*9e82028bSJeremy L Thompson } 191*9e82028bSJeremy L Thompson 192*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 193*9e82028bSJeremy L Thompson CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 194*9e82028bSJeremy L Thompson CeedInt v_offset, const CeedScalar *__restrict__ uu, 195*9e82028bSJeremy L Thompson CeedScalar *__restrict__ vv) { 196*9e82028bSJeremy L Thompson // Get strides 197*9e82028bSJeremy L Thompson bool has_backend_strides; 198*9e82028bSJeremy L Thompson CeedInt strides[3] = {0}; 199*9e82028bSJeremy L Thompson 200*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 201*9e82028bSJeremy L Thompson if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides)); 202*9e82028bSJeremy L Thompson else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 203*9e82028bSJeremy L Thompson 204*9e82028bSJeremy L Thompson // Apply restriction 205*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 206*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 207*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 208*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 209*9e82028bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 210*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 211*9e82028bSJeremy L Thompson } 212*9e82028bSJeremy L Thompson } 213*9e82028bSJeremy L Thompson } 214*9e82028bSJeremy L Thompson } 215*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 216*9e82028bSJeremy L Thompson } 217*9e82028bSJeremy L Thompson 218*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 219*9e82028bSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 220*9e82028bSJeremy L Thompson CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 221*9e82028bSJeremy L Thompson CeedScalar *__restrict__ vv) { 222*9e82028bSJeremy L Thompson // Default restriction with offsets 223*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 224*9e82028bSJeremy L Thompson 225*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 226*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 227*9e82028bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 228*9e82028bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 229*9e82028bSJeremy L Thompson // Iteration bound set to discard padding elements 230*9e82028bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 231*9e82028bSJeremy L Thompson CeedScalar vv_loc; 232*9e82028bSJeremy L Thompson 233*9e82028bSJeremy L Thompson vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 234*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 235*9e82028bSJeremy L Thompson } 236*9e82028bSJeremy L Thompson } 237*9e82028bSJeremy L Thompson } 238*9e82028bSJeremy L Thompson } 239*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 240*9e82028bSJeremy L Thompson } 241*9e82028bSJeremy L Thompson 242*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 243*9e82028bSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 244*9e82028bSJeremy L Thompson CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 245*9e82028bSJeremy L Thompson CeedScalar *__restrict__ vv) { 246*9e82028bSJeremy L Thompson // Restriction with orientations 247*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 248*9e82028bSJeremy L Thompson 249*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 250*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 251*9e82028bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 252*9e82028bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 253*9e82028bSJeremy L Thompson // Iteration bound set to discard padding elements 254*9e82028bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 255*9e82028bSJeremy L Thompson CeedScalar vv_loc; 256*9e82028bSJeremy L Thompson 257*9e82028bSJeremy L Thompson vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 258*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 259*9e82028bSJeremy L Thompson } 260*9e82028bSJeremy L Thompson } 261*9e82028bSJeremy L Thompson } 262*9e82028bSJeremy L Thompson } 263*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 264*9e82028bSJeremy L Thompson } 265*9e82028bSJeremy L Thompson 266*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, 267*9e82028bSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 268*9e82028bSJeremy L Thompson CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 269*9e82028bSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 270*9e82028bSJeremy L Thompson // Restriction with tridiagonal transformation 271*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 272*9e82028bSJeremy L Thompson CeedScalar vv_loc[block_size]; 273*9e82028bSJeremy L Thompson 274*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 275*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 276*9e82028bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 277*9e82028bSJeremy L Thompson // Iteration bound set to discard padding elements 278*9e82028bSJeremy L Thompson const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 279*9e82028bSJeremy L Thompson CeedInt n = 0; 280*9e82028bSJeremy L Thompson 281*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 282*9e82028bSJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 283*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 284*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 285*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 286*9e82028bSJeremy L Thompson } 287*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 288*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 289*9e82028bSJeremy L Thompson } 290*9e82028bSJeremy L Thompson for (n = 1; n < elem_size - 1; n++) { 291*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 292*9e82028bSJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 293*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 294*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 295*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 296*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 297*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 298*9e82028bSJeremy L Thompson } 299*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 300*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 301*9e82028bSJeremy L Thompson } 302*9e82028bSJeremy L Thompson } 303*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 304*9e82028bSJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 305*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 306*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 307*9e82028bSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 308*9e82028bSJeremy L Thompson } 309*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 310*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 311*9e82028bSJeremy L Thompson } 312*9e82028bSJeremy L Thompson } 313*9e82028bSJeremy L Thompson } 314*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 315*9e82028bSJeremy L Thompson } 316*9e82028bSJeremy L Thompson 317*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core( 318*9e82028bSJeremy L Thompson CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop, 319*9e82028bSJeremy L Thompson CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 320*9e82028bSJeremy L Thompson // Restriction with (unsigned) tridiagonal transformation 321*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 322*9e82028bSJeremy L Thompson CeedScalar vv_loc[block_size]; 323*9e82028bSJeremy L Thompson 324*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 325*9e82028bSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 326*9e82028bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 327*9e82028bSJeremy L Thompson // Iteration bound set to discard padding elements 328*9e82028bSJeremy L Thompson const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 329*9e82028bSJeremy L Thompson CeedInt n = 0; 330*9e82028bSJeremy L Thompson 331*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 332*9e82028bSJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 333*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 334*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 335*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 336*9e82028bSJeremy L Thompson } 337*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 338*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 339*9e82028bSJeremy L Thompson } 340*9e82028bSJeremy L Thompson for (n = 1; n < elem_size - 1; n++) { 341*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 342*9e82028bSJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 343*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 344*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 345*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 346*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 347*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 348*9e82028bSJeremy L Thompson } 349*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 350*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 351*9e82028bSJeremy L Thompson } 352*9e82028bSJeremy L Thompson } 353*9e82028bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 354*9e82028bSJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 355*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 356*9e82028bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 357*9e82028bSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 358*9e82028bSJeremy L Thompson } 359*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 360*9e82028bSJeremy L Thompson CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 361*9e82028bSJeremy L Thompson } 362*9e82028bSJeremy L Thompson } 363*9e82028bSJeremy L Thompson } 364*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 365*9e82028bSJeremy L Thompson } 366*9e82028bSJeremy L Thompson 367*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, 368*9e82028bSJeremy L Thompson CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 369*9e82028bSJeremy L Thompson CeedScalar *__restrict__ vv) { 370*9e82028bSJeremy L Thompson CeedInt num_points, l_vec_offset, e_vec_offset = 0; 371*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 372*9e82028bSJeremy L Thompson 373*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 374*9e82028bSJeremy L Thompson for (CeedInt e = start; e < stop; e++) { 375*9e82028bSJeremy L Thompson l_vec_offset = impl->offsets[e]; 376*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 377*9e82028bSJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 378*9e82028bSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 379*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 380*9e82028bSJeremy L Thompson } 381*9e82028bSJeremy L Thompson } else { 382*9e82028bSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 383*9e82028bSJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * num_points + i + e_vec_offset]; 384*9e82028bSJeremy L Thompson } 385*9e82028bSJeremy L Thompson } 386*9e82028bSJeremy L Thompson e_vec_offset += num_points * num_comp; 387*9e82028bSJeremy L Thompson } 388*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 389*9e82028bSJeremy L Thompson } 390*9e82028bSJeremy L Thompson 391*9e82028bSJeremy L Thompson static inline int CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 392*9e82028bSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, 393*9e82028bSJeremy L Thompson bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 394*9e82028bSJeremy L Thompson CeedInt num_elem, elem_size, v_offset; 395*9e82028bSJeremy L Thompson CeedRestrictionType rstr_type; 396*9e82028bSJeremy L Thompson const CeedScalar *uu; 397*9e82028bSJeremy L Thompson CeedScalar *vv; 398*9e82028bSJeremy L Thompson 399*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 400*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 401*9e82028bSJeremy L Thompson v_offset = start * block_size * elem_size * num_comp; 402*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 403*9e82028bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 404*9e82028bSJeremy L Thompson 405*9e82028bSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 406*9e82028bSJeremy L Thompson // Sum into for transpose mode, E-vector to L-vector 407*9e82028bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 408*9e82028bSJeremy L Thompson } else { 409*9e82028bSJeremy L Thompson // Overwrite for notranspose mode, L-vector to E-vector 410*9e82028bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 411*9e82028bSJeremy L Thompson } 412*9e82028bSJeremy L Thompson 413*9e82028bSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 414*9e82028bSJeremy L Thompson // Restriction from E-vector to L-vector 415*9e82028bSJeremy L Thompson // Performing v += r^T * u 416*9e82028bSJeremy L Thompson // uu has shape [elem_size, num_comp, num_elem], row-major 417*9e82028bSJeremy L Thompson // vv has shape [nnodes, num_comp] 418*9e82028bSJeremy L Thompson // Sum into for transpose mode 419*9e82028bSJeremy L Thompson switch (rstr_type) { 420*9e82028bSJeremy L Thompson case CEED_RESTRICTION_STRIDED: 421*9e82028bSJeremy L Thompson CeedCallBackend( 422*9e82028bSJeremy L Thompson CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 423*9e82028bSJeremy L Thompson break; 424*9e82028bSJeremy L Thompson case CEED_RESTRICTION_STANDARD: 425*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 426*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 427*9e82028bSJeremy L Thompson break; 428*9e82028bSJeremy L Thompson case CEED_RESTRICTION_ORIENTED: 429*9e82028bSJeremy L Thompson if (use_signs) { 430*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 431*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 432*9e82028bSJeremy L Thompson } else { 433*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 434*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 435*9e82028bSJeremy L Thompson } 436*9e82028bSJeremy L Thompson break; 437*9e82028bSJeremy L Thompson case CEED_RESTRICTION_CURL_ORIENTED: 438*9e82028bSJeremy L Thompson if (use_signs && use_orients) { 439*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 440*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 441*9e82028bSJeremy L Thompson } else if (use_orients) { 442*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, 443*9e82028bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 444*9e82028bSJeremy L Thompson } else { 445*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 446*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 447*9e82028bSJeremy L Thompson } 448*9e82028bSJeremy L Thompson break; 449*9e82028bSJeremy L Thompson case CEED_RESTRICTION_POINTS: 450*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 451*9e82028bSJeremy L Thompson break; 452*9e82028bSJeremy L Thompson } 453*9e82028bSJeremy L Thompson } else { 454*9e82028bSJeremy L Thompson // Restriction from L-vector to E-vector 455*9e82028bSJeremy L Thompson // Perform: v = r * u 456*9e82028bSJeremy L Thompson // vv has shape [elem_size, num_comp, num_elem], row-major 457*9e82028bSJeremy L Thompson // uu has shape [nnodes, num_comp] 458*9e82028bSJeremy L Thompson // Overwrite for notranspose mode 459*9e82028bSJeremy L Thompson switch (rstr_type) { 460*9e82028bSJeremy L Thompson case CEED_RESTRICTION_STRIDED: 461*9e82028bSJeremy L Thompson CeedCallBackend( 462*9e82028bSJeremy L Thompson CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 463*9e82028bSJeremy L Thompson break; 464*9e82028bSJeremy L Thompson case CEED_RESTRICTION_STANDARD: 465*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 466*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 467*9e82028bSJeremy L Thompson break; 468*9e82028bSJeremy L Thompson case CEED_RESTRICTION_ORIENTED: 469*9e82028bSJeremy L Thompson if (use_signs) { 470*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 471*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 472*9e82028bSJeremy L Thompson } else { 473*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 474*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 475*9e82028bSJeremy L Thompson } 476*9e82028bSJeremy L Thompson break; 477*9e82028bSJeremy L Thompson case CEED_RESTRICTION_CURL_ORIENTED: 478*9e82028bSJeremy L Thompson if (use_signs && use_orients) { 479*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, 480*9e82028bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 481*9e82028bSJeremy L Thompson } else if (use_orients) { 482*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, 483*9e82028bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 484*9e82028bSJeremy L Thompson } else { 485*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 486*9e82028bSJeremy L Thompson elem_size, v_offset, uu, vv)); 487*9e82028bSJeremy L Thompson } 488*9e82028bSJeremy L Thompson break; 489*9e82028bSJeremy L Thompson case CEED_RESTRICTION_POINTS: 490*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 491*9e82028bSJeremy L Thompson break; 492*9e82028bSJeremy L Thompson } 493*9e82028bSJeremy L Thompson } 494*9e82028bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 495*9e82028bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 496*9e82028bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 497*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 498*9e82028bSJeremy L Thompson } 499*9e82028bSJeremy L Thompson 500*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 501*9e82028bSJeremy L Thompson // ElemRestriction Apply 502*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 503*9e82028bSJeremy L Thompson static int CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 504*9e82028bSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 505*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 506*9e82028bSJeremy L Thompson 507*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 508*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 509*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 510*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 511*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 512*9e82028bSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 513*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 514*9e82028bSJeremy L Thompson } 515*9e82028bSJeremy L Thompson 516*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 517*9e82028bSJeremy L Thompson // ElemRestriction Apply Unsigned 518*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 519*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 520*9e82028bSJeremy L Thompson CeedRequest *request) { 521*9e82028bSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 522*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 523*9e82028bSJeremy L Thompson 524*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 525*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 526*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 527*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 528*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 529*9e82028bSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 530*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 531*9e82028bSJeremy L Thompson } 532*9e82028bSJeremy L Thompson 533*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 534*9e82028bSJeremy L Thompson // ElemRestriction Apply Unoriented 535*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 536*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 537*9e82028bSJeremy L Thompson CeedRequest *request) { 538*9e82028bSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 539*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 540*9e82028bSJeremy L Thompson 541*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 542*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 543*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 544*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 545*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 546*9e82028bSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 547*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 548*9e82028bSJeremy L Thompson } 549*9e82028bSJeremy L Thompson 550*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 551*9e82028bSJeremy L Thompson // ElemRestriction Apply Points 552*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 553*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, 554*9e82028bSJeremy L Thompson CeedVector v, CeedRequest *request) { 555*9e82028bSJeremy L Thompson CeedInt num_comp; 556*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 557*9e82028bSJeremy L Thompson 558*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 559*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 560*9e82028bSJeremy L Thompson return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 561*9e82028bSJeremy L Thompson } 562*9e82028bSJeremy L Thompson 563*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 564*9e82028bSJeremy L Thompson // ElemRestriction Apply Block 565*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 566*9e82028bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 567*9e82028bSJeremy L Thompson CeedRequest *request) { 568*9e82028bSJeremy L Thompson CeedInt block_size, num_comp, comp_stride; 569*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 570*9e82028bSJeremy L Thompson 571*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 572*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 573*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 574*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 575*9e82028bSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 576*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 577*9e82028bSJeremy L Thompson } 578*9e82028bSJeremy L Thompson 579*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 580*9e82028bSJeremy L Thompson // ElemRestriction Get Offsets 581*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 582*9e82028bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 583*9e82028bSJeremy L Thompson Ceed ceed; 584*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 585*9e82028bSJeremy L Thompson 586*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 587*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 588*9e82028bSJeremy L Thompson 589*9e82028bSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 590*9e82028bSJeremy L Thompson 591*9e82028bSJeremy L Thompson *offsets = impl->offsets; 592*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 593*9e82028bSJeremy L Thompson } 594*9e82028bSJeremy L Thompson 595*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 596*9e82028bSJeremy L Thompson // ElemRestriction Get Orientations 597*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 598*9e82028bSJeremy L Thompson static int CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 599*9e82028bSJeremy L Thompson Ceed ceed; 600*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 601*9e82028bSJeremy L Thompson 602*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 603*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 604*9e82028bSJeremy L Thompson 605*9e82028bSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 606*9e82028bSJeremy L Thompson 607*9e82028bSJeremy L Thompson *orients = impl->orients; 608*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 609*9e82028bSJeremy L Thompson } 610*9e82028bSJeremy L Thompson 611*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 612*9e82028bSJeremy L Thompson // ElemRestriction Get Curl-Conforming Orientations 613*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 614*9e82028bSJeremy L Thompson static int CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 615*9e82028bSJeremy L Thompson Ceed ceed; 616*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 617*9e82028bSJeremy L Thompson 618*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 619*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 620*9e82028bSJeremy L Thompson 621*9e82028bSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 622*9e82028bSJeremy L Thompson 623*9e82028bSJeremy L Thompson *curl_orients = impl->curl_orients; 624*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 625*9e82028bSJeremy L Thompson } 626*9e82028bSJeremy L Thompson 627*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 628*9e82028bSJeremy L Thompson // ElemRestriction Destroy 629*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 630*9e82028bSJeremy L Thompson static int CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr) { 631*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 632*9e82028bSJeremy L Thompson 633*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 634*9e82028bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 635*9e82028bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->orients_allocated)); 636*9e82028bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 637*9e82028bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 638*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 639*9e82028bSJeremy L Thompson } 640*9e82028bSJeremy L Thompson 641*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 642*9e82028bSJeremy L Thompson // ElemRestriction Create 643*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 644*9e82028bSJeremy L Thompson int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 645*9e82028bSJeremy L Thompson const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 646*9e82028bSJeremy L Thompson Ceed ceed; 647*9e82028bSJeremy L Thompson CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 648*9e82028bSJeremy L Thompson CeedRestrictionType rstr_type; 649*9e82028bSJeremy L Thompson CeedElemRestriction_Memcheck *impl; 650*9e82028bSJeremy L Thompson 651*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 652*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 653*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 654*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 655*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 656*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 657*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 658*9e82028bSJeremy L Thompson CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 659*9e82028bSJeremy L Thompson 660*9e82028bSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 661*9e82028bSJeremy L Thompson 662*9e82028bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 663*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 664*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 665*9e82028bSJeremy L Thompson 666*9e82028bSJeremy L Thompson // Offsets data 667*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 668*9e82028bSJeremy L Thompson if (rstr_type != CEED_RESTRICTION_STRIDED) { 669*9e82028bSJeremy L Thompson const char *resource; 670*9e82028bSJeremy L Thompson 671*9e82028bSJeremy L Thompson // Check indices for ref or memcheck backends 672*9e82028bSJeremy L Thompson { 673*9e82028bSJeremy L Thompson Ceed current = ceed, parent = NULL; 674*9e82028bSJeremy L Thompson 675*9e82028bSJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 676*9e82028bSJeremy L Thompson while (current != parent) { 677*9e82028bSJeremy L Thompson current = parent; 678*9e82028bSJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 679*9e82028bSJeremy L Thompson } 680*9e82028bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent, &resource)); 681*9e82028bSJeremy L Thompson } 682*9e82028bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 683*9e82028bSJeremy L Thompson !strcmp(resource, "/cpu/self/memcheck/blocked")) { 684*9e82028bSJeremy L Thompson CeedSize l_size; 685*9e82028bSJeremy L Thompson 686*9e82028bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 687*9e82028bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 688*9e82028bSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 689*9e82028bSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 690*9e82028bSJeremy L Thompson } 691*9e82028bSJeremy L Thompson } 692*9e82028bSJeremy L Thompson 693*9e82028bSJeremy L Thompson // Copy data 694*9e82028bSJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 695*9e82028bSJeremy L Thompson num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 696*9e82028bSJeremy L Thompson switch (copy_mode) { 697*9e82028bSJeremy L Thompson case CEED_COPY_VALUES: 698*9e82028bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 699*9e82028bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 700*9e82028bSJeremy L Thompson impl->offsets = impl->offsets_allocated; 701*9e82028bSJeremy L Thompson break; 702*9e82028bSJeremy L Thompson case CEED_OWN_POINTER: 703*9e82028bSJeremy L Thompson impl->offsets_allocated = (CeedInt *)offsets; 704*9e82028bSJeremy L Thompson impl->offsets = impl->offsets_allocated; 705*9e82028bSJeremy L Thompson break; 706*9e82028bSJeremy L Thompson case CEED_USE_POINTER: 707*9e82028bSJeremy L Thompson impl->offsets = offsets; 708*9e82028bSJeremy L Thompson } 709*9e82028bSJeremy L Thompson 710*9e82028bSJeremy L Thompson // Orientation data 711*9e82028bSJeremy L Thompson if (rstr_type == CEED_RESTRICTION_ORIENTED) { 712*9e82028bSJeremy L Thompson CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 713*9e82028bSJeremy L Thompson switch (copy_mode) { 714*9e82028bSJeremy L Thompson case CEED_COPY_VALUES: 715*9e82028bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 716*9e82028bSJeremy L Thompson memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 717*9e82028bSJeremy L Thompson impl->orients = impl->orients_allocated; 718*9e82028bSJeremy L Thompson break; 719*9e82028bSJeremy L Thompson case CEED_OWN_POINTER: 720*9e82028bSJeremy L Thompson impl->orients_allocated = (bool *)orients; 721*9e82028bSJeremy L Thompson impl->orients = impl->orients_allocated; 722*9e82028bSJeremy L Thompson break; 723*9e82028bSJeremy L Thompson case CEED_USE_POINTER: 724*9e82028bSJeremy L Thompson impl->orients = orients; 725*9e82028bSJeremy L Thompson } 726*9e82028bSJeremy L Thompson } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 727*9e82028bSJeremy L Thompson CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 728*9e82028bSJeremy L Thompson switch (copy_mode) { 729*9e82028bSJeremy L Thompson case CEED_COPY_VALUES: 730*9e82028bSJeremy L Thompson CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 731*9e82028bSJeremy L Thompson memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 732*9e82028bSJeremy L Thompson impl->curl_orients = impl->curl_orients_allocated; 733*9e82028bSJeremy L Thompson break; 734*9e82028bSJeremy L Thompson case CEED_OWN_POINTER: 735*9e82028bSJeremy L Thompson impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 736*9e82028bSJeremy L Thompson impl->curl_orients = impl->curl_orients_allocated; 737*9e82028bSJeremy L Thompson break; 738*9e82028bSJeremy L Thompson case CEED_USE_POINTER: 739*9e82028bSJeremy L Thompson impl->curl_orients = curl_orients; 740*9e82028bSJeremy L Thompson } 741*9e82028bSJeremy L Thompson } 742*9e82028bSJeremy L Thompson } 743*9e82028bSJeremy L Thompson 744*9e82028bSJeremy L Thompson // Set apply function 745*9e82028bSJeremy L Thompson impl->Apply = CeedElemRestrictionApply_Memcheck_Core; 746*9e82028bSJeremy L Thompson 747*9e82028bSJeremy L Thompson // Register backend functions 748*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Memcheck)); 749*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Memcheck)); 750*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Memcheck)); 751*9e82028bSJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 752*9e82028bSJeremy L Thompson CeedCallBackend( 753*9e82028bSJeremy L Thompson CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Memcheck)); 754*9e82028bSJeremy L Thompson } 755*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Memcheck)); 756*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Memcheck)); 757*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Memcheck)); 758*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Memcheck)); 759*9e82028bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Memcheck)); 760*9e82028bSJeremy L Thompson return CEED_ERROR_SUCCESS; 761*9e82028bSJeremy L Thompson } 762*9e82028bSJeremy L Thompson 763*9e82028bSJeremy L Thompson //------------------------------------------------------------------------------ 764