13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 11fcbe8c06SSebastian Grimberg #include <stdlib.h> 123d576824SJeremy L Thompson #include <string.h> 132b730f8bSJeremy L Thompson 1421617c04Sjeremylt #include "ceed-ref.h" 1521617c04Sjeremylt 16f10650afSjeremylt //------------------------------------------------------------------------------ 17f10650afSjeremylt // Core ElemRestriction Apply Code 18f10650afSjeremylt //------------------------------------------------------------------------------ 191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 2094648b7dSSebastian Grimberg CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 21*171d97d0SJeremy L Thompson CeedSize v_offset, const CeedScalar *__restrict__ uu, 22eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 2394648b7dSSebastian Grimberg // No offsets provided, identity restriction 24d1d35e2fSjeremylt bool has_backend_strides; 25ad70ee2cSJeremy L Thompson 261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 27d1d35e2fSjeremylt if (has_backend_strides) { 28d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 297f90ec76Sjeremylt // This if branch is left separate to allow better inlining 30ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 312b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 322b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 33ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 34*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 35*171d97d0SJeremy L Thompson uu[n + k * (CeedSize)elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp]; 362b730f8bSJeremy L Thompson } 372b730f8bSJeremy L Thompson } 382b730f8bSJeremy L Thompson } 392b730f8bSJeremy L Thompson } 407f90ec76Sjeremylt } else { 417f90ec76Sjeremylt // User provided strides 427f90ec76Sjeremylt CeedInt strides[3]; 43ad70ee2cSJeremy L Thompson 4456c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 45ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 462b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 472b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 48ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 49*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 50*171d97d0SJeremy L Thompson uu[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]]; 512b730f8bSJeremy L Thompson } 522b730f8bSJeremy L Thompson } 532b730f8bSJeremy L Thompson } 542b730f8bSJeremy L Thompson } 557509a596Sjeremylt } 5694648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 5794648b7dSSebastian Grimberg } 5894648b7dSSebastian Grimberg 59eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 6094648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 61*171d97d0SJeremy L Thompson CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 62eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 63fcbe8c06SSebastian Grimberg // Default restriction with offsets 6494648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 65ad70ee2cSJeremy L Thompson 661cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 67ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 682b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 69ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 70*171d97d0SJeremy L Thompson vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 71fcbe8c06SSebastian Grimberg } 72fcbe8c06SSebastian Grimberg } 73fcbe8c06SSebastian Grimberg } 7494648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 7594648b7dSSebastian Grimberg } 7694648b7dSSebastian Grimberg 771cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 7894648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 79*171d97d0SJeremy L Thompson CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 80eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 81fcbe8c06SSebastian Grimberg // Restriction with orientations 8294648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 83ad70ee2cSJeremy L Thompson 841cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 85ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 86fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 87ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 88*171d97d0SJeremy L Thompson vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = 897c1dbaffSSebastian Grimberg uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 90fcbe8c06SSebastian Grimberg } 91fcbe8c06SSebastian Grimberg } 92fcbe8c06SSebastian Grimberg } 9394648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 9494648b7dSSebastian Grimberg } 9594648b7dSSebastian Grimberg 961cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 9794648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 98*171d97d0SJeremy L Thompson CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 99eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 10077d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 10194648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 102ad70ee2cSJeremy L Thompson 1031cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 104ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 105fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1060c73c039SSebastian Grimberg CeedInt n = 0; 1075c7e0f51SSebastian Grimberg 108ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 109*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 110ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 111ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 112ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 113ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1140c73c039SSebastian Grimberg } 1155c7e0f51SSebastian Grimberg CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 116ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 117*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 118ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 119ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 120ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 121ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 122ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 123ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1240c73c039SSebastian Grimberg } 1250c73c039SSebastian Grimberg } 126ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 127*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 128ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 129ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 130ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 131ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 1322b730f8bSJeremy L Thompson } 1332b730f8bSJeremy L Thompson } 1342b730f8bSJeremy L Thompson } 1350c73c039SSebastian Grimberg return CEED_ERROR_SUCCESS; 136fcbe8c06SSebastian Grimberg } 1370c73c039SSebastian Grimberg 1381cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 139ad70ee2cSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 14094648b7dSSebastian Grimberg CeedInt stop, CeedInt num_elem, CeedInt elem_size, 141*171d97d0SJeremy L Thompson CeedSize v_offset, const CeedScalar *__restrict__ uu, 142eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 14394648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 1440c73c039SSebastian Grimberg CeedElemRestriction_Ref *impl; 145ad70ee2cSJeremy L Thompson 1461cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 147ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 1487c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1497c1dbaffSSebastian Grimberg CeedInt n = 0; 150ad70ee2cSJeremy L Thompson 151ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 152*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 153ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 154ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 155ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 156ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1577c1dbaffSSebastian Grimberg } 1585c7e0f51SSebastian Grimberg CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 159ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 160*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 161ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 162ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 163ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 164ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 165ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 166ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1677c1dbaffSSebastian Grimberg } 1687c1dbaffSSebastian Grimberg } 169ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 170*171d97d0SJeremy L Thompson vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 171ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 172ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 173ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 174ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 1757c1dbaffSSebastian Grimberg } 1767c1dbaffSSebastian Grimberg } 1777c1dbaffSSebastian Grimberg } 1787c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 1797c1dbaffSSebastian Grimberg } 1807c1dbaffSSebastian Grimberg 1811cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 18294648b7dSSebastian Grimberg CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 183*171d97d0SJeremy L Thompson CeedSize v_offset, const CeedScalar *__restrict__ uu, 184eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 18594648b7dSSebastian Grimberg // No offsets provided, identity restriction 186d1d35e2fSjeremylt bool has_backend_strides; 187ad70ee2cSJeremy L Thompson 1881cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 189d1d35e2fSjeremylt if (has_backend_strides) { 190d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1917f90ec76Sjeremylt // This if brach is left separate to allow better inlining 192ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 1932b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1942b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 195ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 196*171d97d0SJeremy L Thompson vv[n + k * (CeedSize)elem_size + (e + j) * elem_size * (CeedSize)num_comp] += 197*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset]; 1982b730f8bSJeremy L Thompson } 1992b730f8bSJeremy L Thompson } 2002b730f8bSJeremy L Thompson } 2012b730f8bSJeremy L Thompson } 2027f90ec76Sjeremylt } else { 2037f90ec76Sjeremylt // User provided strides 2047f90ec76Sjeremylt CeedInt strides[3]; 205ad70ee2cSJeremy L Thompson 20656c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 207ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 2082b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 2092b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 210ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 211*171d97d0SJeremy L Thompson vv[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + (e + j) * (CeedSize)strides[2]] += 212*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset]; 2132b730f8bSJeremy L Thompson } 2142b730f8bSJeremy L Thompson } 2152b730f8bSJeremy L Thompson } 2162b730f8bSJeremy L Thompson } 217523b8ea0Sjeremylt } 21894648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 21994648b7dSSebastian Grimberg } 22094648b7dSSebastian Grimberg 221eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 22294648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 223*171d97d0SJeremy L Thompson CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 224eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 225fcbe8c06SSebastian Grimberg // Default restriction with offsets 22694648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 227ad70ee2cSJeremy L Thompson 2281cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 229ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 2302b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 231ad70ee2cSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 2328d94b059Sjeremylt // Iteration bound set to discard padding elements 233ad70ee2cSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 2345c7e0f51SSebastian Grimberg CeedScalar vv_loc; 23558c07c4fSSebastian Grimberg 236*171d97d0SJeremy L Thompson vv_loc = uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset]; 2375c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 238fcbe8c06SSebastian Grimberg } 239fcbe8c06SSebastian Grimberg } 240fcbe8c06SSebastian Grimberg } 241fcbe8c06SSebastian Grimberg } 24294648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 24394648b7dSSebastian Grimberg } 24494648b7dSSebastian Grimberg 2451cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 24694648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 247*171d97d0SJeremy L Thompson CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 248eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 249fcbe8c06SSebastian Grimberg // Restriction with orientations 25094648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 251ad70ee2cSJeremy L Thompson 2521cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 253ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 254fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 255ad70ee2cSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 256fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 257ad70ee2cSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 2585c7e0f51SSebastian Grimberg CeedScalar vv_loc; 25958c07c4fSSebastian Grimberg 260*171d97d0SJeremy L Thompson vv_loc = 261*171d97d0SJeremy L Thompson uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 2625c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 263fcbe8c06SSebastian Grimberg } 264fcbe8c06SSebastian Grimberg } 265fcbe8c06SSebastian Grimberg } 266fcbe8c06SSebastian Grimberg } 26794648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 26894648b7dSSebastian Grimberg } 26994648b7dSSebastian Grimberg 2701cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 27194648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 272*171d97d0SJeremy L Thompson CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 273eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 27477d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 27594648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 2765c7e0f51SSebastian Grimberg CeedScalar vv_loc[block_size]; 277ad70ee2cSJeremy L Thompson 2781cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 279ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 280fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 281fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 28258c07c4fSSebastian Grimberg const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 28358c07c4fSSebastian Grimberg CeedInt n = 0; 28458c07c4fSSebastian Grimberg 2855c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 286*171d97d0SJeremy L Thompson vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 287ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 288*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 289ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 2905c7e0f51SSebastian Grimberg } 2915c7e0f51SSebastian Grimberg for (CeedInt j = 0; j < block_end; j++) { 2925c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 2930c73c039SSebastian Grimberg } 2940c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 2955c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 296*171d97d0SJeremy L Thompson vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 297ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 298*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 299ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 300*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 301ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 3020c73c039SSebastian Grimberg } 303ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 3045c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3055c7e0f51SSebastian Grimberg } 3065c7e0f51SSebastian Grimberg } 3075c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 308*171d97d0SJeremy L Thompson vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 309ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 310*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 311ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 3125c7e0f51SSebastian Grimberg } 3135c7e0f51SSebastian Grimberg for (CeedInt j = 0; j < block_end; j++) { 3145c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 31521617c04Sjeremylt } 316b435c5a6Srezgarshakeri } 3172b730f8bSJeremy L Thompson } 318e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31921617c04Sjeremylt } 32021617c04Sjeremylt 3211cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 322ad70ee2cSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 323*171d97d0SJeremy L Thompson CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, 324eda0adbcSSebastian Grimberg const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 32594648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 3267c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 3275c7e0f51SSebastian Grimberg CeedScalar vv_loc[block_size]; 328ad70ee2cSJeremy L Thompson 3291cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 330ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 3317c1dbaffSSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 3327c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 333ad70ee2cSJeremy L Thompson const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 33458c07c4fSSebastian Grimberg CeedInt n = 0; 335ad70ee2cSJeremy L Thompson 3365c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 337*171d97d0SJeremy L Thompson vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 338ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 339*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 340ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 3415c7e0f51SSebastian Grimberg } 3425c7e0f51SSebastian Grimberg for (CeedInt j = 0; j < block_end; j++) { 3435c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3447c1dbaffSSebastian Grimberg } 3457c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 3465c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 347*171d97d0SJeremy L Thompson vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 348ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 349*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 350ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 351*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 352ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 3537c1dbaffSSebastian Grimberg } 354ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 3555c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3565c7e0f51SSebastian Grimberg } 3575c7e0f51SSebastian Grimberg } 3585c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 359*171d97d0SJeremy L Thompson vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 360ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 361*171d97d0SJeremy L Thompson uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 362ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 3635c7e0f51SSebastian Grimberg } 3645c7e0f51SSebastian Grimberg for (CeedInt j = 0; j < block_end; j++) { 3655c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3667c1dbaffSSebastian Grimberg } 3677c1dbaffSSebastian Grimberg } 3687c1dbaffSSebastian Grimberg } 3697c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 3707c1dbaffSSebastian Grimberg } 3717c1dbaffSSebastian Grimberg 3721249ccc5SJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 373eda0adbcSSebastian Grimberg CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 374eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 375*171d97d0SJeremy L Thompson CeedInt num_points, l_vec_offset; 376*171d97d0SJeremy L Thompson CeedSize e_vec_offset = 0; 37705fa913cSJeremy L Thompson CeedElemRestriction_Ref *impl; 37805fa913cSJeremy L Thompson 37905fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 38005fa913cSJeremy L Thompson for (CeedInt e = start; e < stop; e++) { 3810930e4e7SJeremy L Thompson l_vec_offset = impl->offsets[e]; 38205fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 38305fa913cSJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 38405fa913cSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 385*171d97d0SJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[j * (CeedSize)num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 38605fa913cSJeremy L Thompson } 38705fa913cSJeremy L Thompson } else { 38805fa913cSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 389*171d97d0SJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * (CeedSize)num_points + i + e_vec_offset]; 39005fa913cSJeremy L Thompson } 39105fa913cSJeremy L Thompson } 392*171d97d0SJeremy L Thompson e_vec_offset += num_points * (CeedSize)num_comp; 39305fa913cSJeremy L Thompson } 39405fa913cSJeremy L Thompson return CEED_ERROR_SUCCESS; 39505fa913cSJeremy L Thompson } 39605fa913cSJeremy L Thompson 3971cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 398ad70ee2cSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, 399ad70ee2cSJeremy L Thompson bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 400*171d97d0SJeremy L Thompson CeedInt num_elem, elem_size; 401*171d97d0SJeremy L Thompson CeedSize v_offset = 0; 402ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 4037c1dbaffSSebastian Grimberg const CeedScalar *uu; 4047c1dbaffSSebastian Grimberg CeedScalar *vv; 405ad70ee2cSJeremy L Thompson 4061cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 4071cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 408*171d97d0SJeremy L Thompson v_offset = start * block_size * elem_size * (CeedSize)num_comp; 4091cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 41094648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 411ad70ee2cSJeremy L Thompson 41294648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 41394648b7dSSebastian Grimberg // Sum into for transpose mode, E-vector to L-vector 41494648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 41594648b7dSSebastian Grimberg } else { 41694648b7dSSebastian Grimberg // Overwrite for notranspose mode, L-vector to E-vector 41794648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 41894648b7dSSebastian Grimberg } 41994648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 4207c1dbaffSSebastian Grimberg // Restriction from E-vector to L-vector 4217c1dbaffSSebastian Grimberg // Performing v += r^T * u 4227c1dbaffSSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 4237c1dbaffSSebastian Grimberg // vv has shape [nnodes, num_comp] 4247c1dbaffSSebastian Grimberg // Sum into for transpose mode 4257c1dbaffSSebastian Grimberg switch (rstr_type) { 4267c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4275d10938bSJeremy L Thompson CeedCallBackend( 4281cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 42994648b7dSSebastian Grimberg break; 43061a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 431eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 4325d10938bSJeremy L Thompson v_offset, uu, vv)); 43394648b7dSSebastian Grimberg break; 4347c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 43594648b7dSSebastian Grimberg if (use_signs) { 4361cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4371cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 43894648b7dSSebastian Grimberg } else { 439eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 440eda0adbcSSebastian Grimberg v_offset, uu, vv)); 44194648b7dSSebastian Grimberg } 44294648b7dSSebastian Grimberg break; 44394648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 44494648b7dSSebastian Grimberg if (use_signs && use_orients) { 4451cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4465d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 44794648b7dSSebastian Grimberg } else if (use_orients) { 4481cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4491cc2c60dSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 45094648b7dSSebastian Grimberg } else { 451eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 452eda0adbcSSebastian Grimberg v_offset, uu, vv)); 45394648b7dSSebastian Grimberg } 45494648b7dSSebastian Grimberg break; 4552c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4561249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4572c7e7413SJeremy L Thompson break; 45894648b7dSSebastian Grimberg } 45994648b7dSSebastian Grimberg } else { 46094648b7dSSebastian Grimberg // Restriction from L-vector to E-vector 46194648b7dSSebastian Grimberg // Perform: v = r * u 46294648b7dSSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 46394648b7dSSebastian Grimberg // uu has shape [nnodes, num_comp] 46494648b7dSSebastian Grimberg // Overwrite for notranspose mode 46594648b7dSSebastian Grimberg switch (rstr_type) { 46694648b7dSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4675d10938bSJeremy L Thompson CeedCallBackend( 4681cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 46994648b7dSSebastian Grimberg break; 47061a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 471eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 472eda0adbcSSebastian Grimberg v_offset, uu, vv)); 47394648b7dSSebastian Grimberg break; 47494648b7dSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 47594648b7dSSebastian Grimberg if (use_signs) { 4761cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4771cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 47894648b7dSSebastian Grimberg } else { 479eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4801cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 48194648b7dSSebastian Grimberg } 48294648b7dSSebastian Grimberg break; 48394648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 48494648b7dSSebastian Grimberg if (use_signs && use_orients) { 4851cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4865d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 48794648b7dSSebastian Grimberg } else if (use_orients) { 4881cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4895d10938bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 49094648b7dSSebastian Grimberg } else { 491eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4921cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 49394648b7dSSebastian Grimberg } 49494648b7dSSebastian Grimberg break; 4952c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4961249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4972c7e7413SJeremy L Thompson break; 49894648b7dSSebastian Grimberg } 4997c1dbaffSSebastian Grimberg } 5007c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 5017c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 5027c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 5037c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 5047c1dbaffSSebastian Grimberg } 5057c1dbaffSSebastian Grimberg 5067c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 507f10650afSjeremylt // ElemRestriction Apply - Common Sizes 508f10650afSjeremylt //------------------------------------------------------------------------------ 5091cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5107c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5117c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5121cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 513d979a051Sjeremylt } 514d979a051Sjeremylt 5151cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5167c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5177c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5181cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5194d2a38eeSjeremylt } 5204d2a38eeSjeremylt 5211cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5227c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5237c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5241cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 5259c36149bSjeremylt } 5269c36149bSjeremylt 5271cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5287c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5297c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5301cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5319c36149bSjeremylt } 5329c36149bSjeremylt 5331cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5347c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5357c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5361cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 537d979a051Sjeremylt } 538d979a051Sjeremylt 5391cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5407c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5417c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5421cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 543d979a051Sjeremylt } 544d979a051Sjeremylt 5451cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5467c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5477c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5481cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 549d979a051Sjeremylt } 550d979a051Sjeremylt 5511cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5527c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5537c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5541cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 555d979a051Sjeremylt } 556d979a051Sjeremylt 557bf4d1581Sjeremylt // LCOV_EXCL_START 5581cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5597c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5607c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5611cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 562d979a051Sjeremylt } 563bf4d1581Sjeremylt // LCOV_EXCL_STOP 564d979a051Sjeremylt 5651cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5667c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5677c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5681cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 569d979a051Sjeremylt } 570d979a051Sjeremylt 571bf4d1581Sjeremylt // LCOV_EXCL_START 5721cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5737c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5747c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5751cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 576d979a051Sjeremylt } 577bf4d1581Sjeremylt // LCOV_EXCL_STOP 578d979a051Sjeremylt 5791cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5807c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5810c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 5821cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5834d2a38eeSjeremylt } 5844d2a38eeSjeremylt 585f10650afSjeremylt //------------------------------------------------------------------------------ 586f10650afSjeremylt // ElemRestriction Apply 587f10650afSjeremylt //------------------------------------------------------------------------------ 5881cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 589ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 590ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 591ad70ee2cSJeremy L Thompson 5921cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 5931cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 5941cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 5951cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 5961cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 5971cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 5985d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 599f30b1135SSebastian Grimberg } 600f30b1135SSebastian Grimberg 601f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 602f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 603f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 6041cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6051cc2c60dSJeremy L Thompson CeedRequest *request) { 606ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 607ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 608ad70ee2cSJeremy L Thompson 6091cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6101cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6111cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6121cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6131cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6141cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 6155d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6167c1dbaffSSebastian Grimberg } 6177c1dbaffSSebastian Grimberg 6187c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6197c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented 6207c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6211cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6221cc2c60dSJeremy L Thompson CeedRequest *request) { 623ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 624ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 625ad70ee2cSJeremy L Thompson 6261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6271cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6281cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6291cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6301cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6311cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 6325d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6339c36149bSjeremylt } 634be9261b7Sjeremylt 635f10650afSjeremylt //------------------------------------------------------------------------------ 6362c7e7413SJeremy L Thompson // ElemRestriction Apply Points 6372c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 638eda0adbcSSebastian Grimberg static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6392c7e7413SJeremy L Thompson CeedRequest *request) { 64005fa913cSJeremy L Thompson CeedInt num_comp; 6412c7e7413SJeremy L Thompson CeedElemRestriction_Ref *impl; 6422c7e7413SJeremy L Thompson 643eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 644eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 645eda0adbcSSebastian Grimberg return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 6462c7e7413SJeremy L Thompson } 6472c7e7413SJeremy L Thompson 6482c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 649f10650afSjeremylt // ElemRestriction Apply Block 650f10650afSjeremylt //------------------------------------------------------------------------------ 6511cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 652074cb416Sjeremylt CeedRequest *request) { 653ad70ee2cSJeremy L Thompson CeedInt block_size, num_comp, comp_stride; 654ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 655ad70ee2cSJeremy L Thompson 6561cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6571cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6581cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6591cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6601cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 6615d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6629c36149bSjeremylt } 663be9261b7Sjeremylt 664f10650afSjeremylt //------------------------------------------------------------------------------ 665bd33150aSjeremylt // ElemRestriction Get Offsets 666bd33150aSjeremylt //------------------------------------------------------------------------------ 6672b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 668ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 669ad70ee2cSJeremy L Thompson 670ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 671bd33150aSjeremylt 6726e536b99SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 673bd33150aSjeremylt 674bd33150aSjeremylt *offsets = impl->offsets; 675e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 676bd33150aSjeremylt } 677bd33150aSjeremylt 678bd33150aSjeremylt //------------------------------------------------------------------------------ 67977d1c127SSebastian Grimberg // ElemRestriction Get Orientations 68077d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 68177d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 682ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 683ad70ee2cSJeremy L Thompson 684ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 68577d1c127SSebastian Grimberg 6866e536b99SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 68777d1c127SSebastian Grimberg 68877d1c127SSebastian Grimberg *orients = impl->orients; 68977d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 69077d1c127SSebastian Grimberg } 69177d1c127SSebastian Grimberg 69277d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 69377d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations 69477d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 6950c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 696ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 697ad70ee2cSJeremy L Thompson 698ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 69977d1c127SSebastian Grimberg 7006e536b99SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 70177d1c127SSebastian Grimberg 70277d1c127SSebastian Grimberg *curl_orients = impl->curl_orients; 70377d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 70477d1c127SSebastian Grimberg } 70577d1c127SSebastian Grimberg 70677d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 707f10650afSjeremylt // ElemRestriction Destroy 708f10650afSjeremylt //------------------------------------------------------------------------------ 7091cc2c60dSJeremy L Thompson static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 710fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 71121617c04Sjeremylt 7121cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 7132b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 71477d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->orients_allocated)); 71577d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 7162b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 717e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 71821617c04Sjeremylt } 71921617c04Sjeremylt 720f10650afSjeremylt //------------------------------------------------------------------------------ 721f10650afSjeremylt // ElemRestriction Create 722f10650afSjeremylt //------------------------------------------------------------------------------ 723fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 7241cc2c60dSJeremy L Thompson const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 725ad70ee2cSJeremy L Thompson Ceed ceed; 72607d5dec1SJeremy L Thompson CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 727ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 72821617c04Sjeremylt CeedElemRestriction_Ref *impl; 729ad70ee2cSJeremy L Thompson 7301cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 7311cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 7321cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 7331cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 7341cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 7351cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 7361cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 73722eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 73821617c04Sjeremylt 7396574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 740dce49693SSebastian Grimberg 7412b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 742dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 74322eb1385SJeremy L Thompson 74422eb1385SJeremy L Thompson // Set layouts 74522eb1385SJeremy L Thompson { 74622eb1385SJeremy L Thompson bool has_backend_strides; 74722eb1385SJeremy L Thompson CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 74822eb1385SJeremy L Thompson 749dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 75022eb1385SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_STRIDED) { 75122eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 75222eb1385SJeremy L Thompson if (has_backend_strides) { 75322eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 75422eb1385SJeremy L Thompson } 75522eb1385SJeremy L Thompson } 75622eb1385SJeremy L Thompson } 7573661185eSjeremylt 75892fe105eSJeremy L Thompson // Offsets data 759fcbe8c06SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 7603661185eSjeremylt const char *resource; 761ad70ee2cSJeremy L Thompson 762ad70ee2cSJeremy L Thompson // Check indices for ref or memcheck backends 76335aed383SJeremy L Thompson { 76435aed383SJeremy L Thompson Ceed current = ceed, parent = NULL; 76535aed383SJeremy L Thompson 76635aed383SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 76735aed383SJeremy L Thompson while (current != parent) { 76835aed383SJeremy L Thompson current = parent; 76935aed383SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 77035aed383SJeremy L Thompson } 77135aed383SJeremy L Thompson CeedCallBackend(CeedGetResource(parent, &resource)); 77235aed383SJeremy L Thompson } 7732b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 774d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 775e79b91d9SJeremy L Thompson CeedSize l_size; 7763661185eSjeremylt 7771cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 7782b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 7796574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 7806574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 7812b730f8bSJeremy L Thompson } 7822b730f8bSJeremy L Thompson } 7833661185eSjeremylt 78492fe105eSJeremy L Thompson // Copy data 78507d5dec1SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 78607d5dec1SJeremy L Thompson num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 787d1d35e2fSjeremylt switch (copy_mode) { 78821617c04Sjeremylt case CEED_COPY_VALUES: 78907d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 79007d5dec1SJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 791d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 79221617c04Sjeremylt break; 79321617c04Sjeremylt case CEED_OWN_POINTER: 794d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 795d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 79621617c04Sjeremylt break; 79721617c04Sjeremylt case CEED_USE_POINTER: 798d979a051Sjeremylt impl->offsets = offsets; 79921617c04Sjeremylt } 800fcbe8c06SSebastian Grimberg 801fcbe8c06SSebastian Grimberg // Orientation data 802fcbe8c06SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 8030305e208SSebastian Grimberg CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 804fcbe8c06SSebastian Grimberg switch (copy_mode) { 805fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 80607d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 80707d5dec1SJeremy L Thompson memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 808fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 809fcbe8c06SSebastian Grimberg break; 810fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 811fcbe8c06SSebastian Grimberg impl->orients_allocated = (bool *)orients; 812fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 813fcbe8c06SSebastian Grimberg break; 814fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 815fcbe8c06SSebastian Grimberg impl->orients = orients; 816fcbe8c06SSebastian Grimberg } 817fcbe8c06SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 8180305e208SSebastian Grimberg CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 819fcbe8c06SSebastian Grimberg switch (copy_mode) { 820fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 82107d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 82207d5dec1SJeremy L Thompson memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 823fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 824fcbe8c06SSebastian Grimberg break; 825fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 8260c73c039SSebastian Grimberg impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 827fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 828fcbe8c06SSebastian Grimberg break; 829fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 830fcbe8c06SSebastian Grimberg impl->curl_orients = curl_orients; 831fcbe8c06SSebastian Grimberg } 832fcbe8c06SSebastian Grimberg } 83392fe105eSJeremy L Thompson } 834fe2413ffSjeremylt 835ad70ee2cSJeremy L Thompson // Set apply function based upon num_comp, block_size, and comp_stride 836ad70ee2cSJeremy L Thompson CeedInt index = -1; 837ad70ee2cSJeremy L Thompson 838ad70ee2cSJeremy L Thompson if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 839ad70ee2cSJeremy L Thompson switch (index) { 840d979a051Sjeremylt case 110: 841d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 842d979a051Sjeremylt break; 843d979a051Sjeremylt case 111: 844d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 845d979a051Sjeremylt break; 846d979a051Sjeremylt case 180: 847d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 848d979a051Sjeremylt break; 849d979a051Sjeremylt case 181: 850d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 851d979a051Sjeremylt break; 852d979a051Sjeremylt case 310: 853d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 854d979a051Sjeremylt break; 855d979a051Sjeremylt case 311: 856d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 857d979a051Sjeremylt break; 858d979a051Sjeremylt case 380: 859d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 860d979a051Sjeremylt break; 861d979a051Sjeremylt case 381: 862d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 863d979a051Sjeremylt break; 864bf4d1581Sjeremylt // LCOV_EXCL_START 865d979a051Sjeremylt case 510: 866d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 867d979a051Sjeremylt break; 868bf4d1581Sjeremylt // LCOV_EXCL_STOP 869d979a051Sjeremylt case 511: 870d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 871d979a051Sjeremylt break; 872bf4d1581Sjeremylt // LCOV_EXCL_START 873d979a051Sjeremylt case 580: 874d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 875d979a051Sjeremylt break; 876bf4d1581Sjeremylt // LCOV_EXCL_STOP 877d979a051Sjeremylt case 581: 878d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 879d979a051Sjeremylt break; 880d979a051Sjeremylt default: 881d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 882d979a051Sjeremylt break; 883d979a051Sjeremylt } 884dce49693SSebastian Grimberg 885dce49693SSebastian Grimberg // Register backend functions 886dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 887dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 888dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 889dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_POINTS) { 890dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 891dce49693SSebastian Grimberg } 892dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 893dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 894dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 895dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 896dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 897e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 89821617c04Sjeremylt } 899fc0567d9Srezgarshakeri 900fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 901