13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 11fcbe8c06SSebastian Grimberg #include <stdlib.h> 123d576824SJeremy L Thompson #include <string.h> 132b730f8bSJeremy L Thompson 1421617c04Sjeremylt #include "ceed-ref.h" 1521617c04Sjeremylt 16f10650afSjeremylt //------------------------------------------------------------------------------ 17f10650afSjeremylt // Core ElemRestriction Apply Code 18f10650afSjeremylt //------------------------------------------------------------------------------ 191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 2094648b7dSSebastian Grimberg CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 21*eda0adbcSSebastian Grimberg CeedInt v_offset, const CeedScalar *__restrict__ uu, 22*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 2394648b7dSSebastian Grimberg // No offsets provided, identity restriction 24d1d35e2fSjeremylt bool has_backend_strides; 25ad70ee2cSJeremy L Thompson 261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 27d1d35e2fSjeremylt if (has_backend_strides) { 28d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 297f90ec76Sjeremylt // This if branch is left separate to allow better inlining 30ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 312b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 322b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 33ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 34ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 352b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 362b730f8bSJeremy L Thompson } 372b730f8bSJeremy L Thompson } 382b730f8bSJeremy L Thompson } 392b730f8bSJeremy L Thompson } 407f90ec76Sjeremylt } else { 417f90ec76Sjeremylt // User provided strides 427f90ec76Sjeremylt CeedInt strides[3]; 43ad70ee2cSJeremy L Thompson 441cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 45ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 462b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 472b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 48ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 49ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 502b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 512b730f8bSJeremy L Thompson } 522b730f8bSJeremy L Thompson } 532b730f8bSJeremy L Thompson } 542b730f8bSJeremy L Thompson } 557509a596Sjeremylt } 5694648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 5794648b7dSSebastian Grimberg } 5894648b7dSSebastian Grimberg 59*eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 6094648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 61*eda0adbcSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 62*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 63fcbe8c06SSebastian Grimberg // Default restriction with offsets 6494648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 65ad70ee2cSJeremy L Thompson 661cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 67ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 682b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 69ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 70ad70ee2cSJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 71fcbe8c06SSebastian Grimberg } 72fcbe8c06SSebastian Grimberg } 73fcbe8c06SSebastian Grimberg } 7494648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 7594648b7dSSebastian Grimberg } 7694648b7dSSebastian Grimberg 771cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 7894648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 79*eda0adbcSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 80*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 81fcbe8c06SSebastian Grimberg // Restriction with orientations 8294648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 83ad70ee2cSJeremy L Thompson 841cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 85ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 86fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 87ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 88ad70ee2cSJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = 897c1dbaffSSebastian Grimberg uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 90fcbe8c06SSebastian Grimberg } 91fcbe8c06SSebastian Grimberg } 92fcbe8c06SSebastian Grimberg } 9394648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 9494648b7dSSebastian Grimberg } 9594648b7dSSebastian Grimberg 961cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 9794648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 98*eda0adbcSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 99*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 10077d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 10194648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 102ad70ee2cSJeremy L Thompson 1031cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 104ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 105fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1060c73c039SSebastian Grimberg CeedInt n = 0; 107ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 108ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 109ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 110ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 111ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 112ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1130c73c039SSebastian Grimberg } 1140c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 115ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 116ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 117ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 118ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 119ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 120ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 121ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 122ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1230c73c039SSebastian Grimberg } 1240c73c039SSebastian Grimberg } 125ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 126ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 127ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 128ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 129ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 130ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 1312b730f8bSJeremy L Thompson } 1322b730f8bSJeremy L Thompson } 1332b730f8bSJeremy L Thompson } 1340c73c039SSebastian Grimberg return CEED_ERROR_SUCCESS; 135fcbe8c06SSebastian Grimberg } 1360c73c039SSebastian Grimberg 1371cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 138ad70ee2cSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 13994648b7dSSebastian Grimberg CeedInt stop, CeedInt num_elem, CeedInt elem_size, 140*eda0adbcSSebastian Grimberg CeedInt v_offset, const CeedScalar *__restrict__ uu, 141*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 14294648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 1430c73c039SSebastian Grimberg CeedElemRestriction_Ref *impl; 144ad70ee2cSJeremy L Thompson 1451cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 146ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 1477c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1487c1dbaffSSebastian Grimberg CeedInt n = 0; 149ad70ee2cSJeremy L Thompson 150ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 151ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 152ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 153ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 154ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 155ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1567c1dbaffSSebastian Grimberg } 1577c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 158ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 159ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 160ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 161ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 162ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 163ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 164ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 165ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1667c1dbaffSSebastian Grimberg } 1677c1dbaffSSebastian Grimberg } 168ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 169ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 170ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 171ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 172ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 173ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 1747c1dbaffSSebastian Grimberg } 1757c1dbaffSSebastian Grimberg } 1767c1dbaffSSebastian Grimberg } 1777c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 1787c1dbaffSSebastian Grimberg } 1797c1dbaffSSebastian Grimberg 1801cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 18194648b7dSSebastian Grimberg CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 182*eda0adbcSSebastian Grimberg CeedInt v_offset, const CeedScalar *__restrict__ uu, 183*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 18494648b7dSSebastian Grimberg // No offsets provided, identity restriction 185d1d35e2fSjeremylt bool has_backend_strides; 186ad70ee2cSJeremy L Thompson 1871cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 188d1d35e2fSjeremylt if (has_backend_strides) { 189d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1907f90ec76Sjeremylt // This if brach is left separate to allow better inlining 191ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 1922b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1932b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 194ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 195bf9b6c6bSSebastian Grimberg vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 1962b730f8bSJeremy L Thompson } 1972b730f8bSJeremy L Thompson } 1982b730f8bSJeremy L Thompson } 1992b730f8bSJeremy L Thompson } 2007f90ec76Sjeremylt } else { 2017f90ec76Sjeremylt // User provided strides 2027f90ec76Sjeremylt CeedInt strides[3]; 203ad70ee2cSJeremy L Thompson 2041cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 205ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 2062b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 2072b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 208ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 2096d5e61d4SSebastian Grimberg vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 2106d5e61d4SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 2112b730f8bSJeremy L Thompson } 2122b730f8bSJeremy L Thompson } 2132b730f8bSJeremy L Thompson } 2142b730f8bSJeremy L Thompson } 215523b8ea0Sjeremylt } 21694648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 21794648b7dSSebastian Grimberg } 21894648b7dSSebastian Grimberg 219*eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 22094648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 221*eda0adbcSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 222*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 223fcbe8c06SSebastian Grimberg // Default restriction with offsets 22494648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 225ad70ee2cSJeremy L Thompson 2261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 227ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 2282b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 229ad70ee2cSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 2308d94b059Sjeremylt // Iteration bound set to discard padding elements 231ad70ee2cSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 23258c07c4fSSebastian Grimberg CeedScalar uu_val; 23358c07c4fSSebastian Grimberg 23458c07c4fSSebastian Grimberg uu_val = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 23558c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu_val; 236fcbe8c06SSebastian Grimberg } 237fcbe8c06SSebastian Grimberg } 238fcbe8c06SSebastian Grimberg } 239fcbe8c06SSebastian Grimberg } 24094648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 24194648b7dSSebastian Grimberg } 24294648b7dSSebastian Grimberg 2431cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 24494648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 245*eda0adbcSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 246*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 247fcbe8c06SSebastian Grimberg // Restriction with orientations 24894648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 249ad70ee2cSJeremy L Thompson 2501cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 251ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 252fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 253ad70ee2cSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 254fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 255ad70ee2cSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 25658c07c4fSSebastian Grimberg CeedScalar uu_val; 25758c07c4fSSebastian Grimberg 25858c07c4fSSebastian Grimberg uu_val = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 25958c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu_val; 260fcbe8c06SSebastian Grimberg } 261fcbe8c06SSebastian Grimberg } 262fcbe8c06SSebastian Grimberg } 263fcbe8c06SSebastian Grimberg } 26494648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 26594648b7dSSebastian Grimberg } 26694648b7dSSebastian Grimberg 2671cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 26894648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 269*eda0adbcSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 270*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 27177d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 27294648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 273ad70ee2cSJeremy L Thompson 2741cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 275ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 276fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 277fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 27858c07c4fSSebastian Grimberg const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 27958c07c4fSSebastian Grimberg CeedInt n = 0; 28058c07c4fSSebastian Grimberg 281ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 28258c07c4fSSebastian Grimberg CeedScalar uu_val; 28358c07c4fSSebastian Grimberg 28458c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 285ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 286ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 287ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 28858c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 2890c73c039SSebastian Grimberg } 2900c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 291ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 29258c07c4fSSebastian Grimberg CeedScalar uu_val; 29358c07c4fSSebastian Grimberg 29458c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 295ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 296ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 297ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 298ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 299ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 30058c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3010c73c039SSebastian Grimberg } 3020c73c039SSebastian Grimberg } 303ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 30458c07c4fSSebastian Grimberg CeedScalar uu_val; 30558c07c4fSSebastian Grimberg 30658c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 307ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 308ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 309ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 31058c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 31121617c04Sjeremylt } 312b435c5a6Srezgarshakeri } 3132b730f8bSJeremy L Thompson } 314e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31521617c04Sjeremylt } 31621617c04Sjeremylt 3171cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 318ad70ee2cSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 31994648b7dSSebastian Grimberg CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 320*eda0adbcSSebastian Grimberg const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 32194648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 3227c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 323ad70ee2cSJeremy L Thompson 3241cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 325ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 3267c1dbaffSSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 3277c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 328ad70ee2cSJeremy L Thompson const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 32958c07c4fSSebastian Grimberg CeedInt n = 0; 330ad70ee2cSJeremy L Thompson 331ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 33258c07c4fSSebastian Grimberg CeedScalar uu_val; 33358c07c4fSSebastian Grimberg 33458c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 335ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 336ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 337ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 33858c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3397c1dbaffSSebastian Grimberg } 3407c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 341ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 34258c07c4fSSebastian Grimberg CeedScalar uu_val; 34358c07c4fSSebastian Grimberg 34458c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 345ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 346ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 347ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 348ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 349ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 35058c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3517c1dbaffSSebastian Grimberg } 3527c1dbaffSSebastian Grimberg } 353ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 35458c07c4fSSebastian Grimberg CeedScalar uu_val; 35558c07c4fSSebastian Grimberg 35658c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 357ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 358ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 359ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 36058c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3617c1dbaffSSebastian Grimberg } 3627c1dbaffSSebastian Grimberg } 3637c1dbaffSSebastian Grimberg } 3647c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 3657c1dbaffSSebastian Grimberg } 3667c1dbaffSSebastian Grimberg 3671249ccc5SJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 368*eda0adbcSSebastian Grimberg CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 369*eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 37005fa913cSJeremy L Thompson CeedInt num_points, l_vec_offset, e_vec_offset = 0; 37105fa913cSJeremy L Thompson CeedElemRestriction_Ref *impl; 37205fa913cSJeremy L Thompson 37305fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 37405fa913cSJeremy L Thompson for (CeedInt e = start; e < stop; e++) { 3750930e4e7SJeremy L Thompson l_vec_offset = impl->offsets[e]; 37605fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 37705fa913cSJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 37805fa913cSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 3799c34f28eSJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 38005fa913cSJeremy L Thompson } 38105fa913cSJeremy L Thompson } else { 38205fa913cSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 3839c34f28eSJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * num_points + i + e_vec_offset]; 38405fa913cSJeremy L Thompson } 38505fa913cSJeremy L Thompson } 38605fa913cSJeremy L Thompson e_vec_offset += num_points * num_comp; 38705fa913cSJeremy L Thompson } 38805fa913cSJeremy L Thompson return CEED_ERROR_SUCCESS; 38905fa913cSJeremy L Thompson } 39005fa913cSJeremy L Thompson 3911cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 392ad70ee2cSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, 393ad70ee2cSJeremy L Thompson bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 394ad70ee2cSJeremy L Thompson CeedInt num_elem, elem_size, v_offset; 395ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 3967c1dbaffSSebastian Grimberg const CeedScalar *uu; 3977c1dbaffSSebastian Grimberg CeedScalar *vv; 398ad70ee2cSJeremy L Thompson 3991cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 4001cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 401ad70ee2cSJeremy L Thompson v_offset = start * block_size * elem_size * num_comp; 4021cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 40394648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 404ad70ee2cSJeremy L Thompson 40594648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 40694648b7dSSebastian Grimberg // Sum into for transpose mode, E-vector to L-vector 40794648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 40894648b7dSSebastian Grimberg } else { 40994648b7dSSebastian Grimberg // Overwrite for notranspose mode, L-vector to E-vector 41094648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 41194648b7dSSebastian Grimberg } 41294648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 4137c1dbaffSSebastian Grimberg // Restriction from E-vector to L-vector 4147c1dbaffSSebastian Grimberg // Performing v += r^T * u 4157c1dbaffSSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 4167c1dbaffSSebastian Grimberg // vv has shape [nnodes, num_comp] 4177c1dbaffSSebastian Grimberg // Sum into for transpose mode 4187c1dbaffSSebastian Grimberg switch (rstr_type) { 4197c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4205d10938bSJeremy L Thompson CeedCallBackend( 4211cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 42294648b7dSSebastian Grimberg break; 42361a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 424*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 4255d10938bSJeremy L Thompson v_offset, uu, vv)); 42694648b7dSSebastian Grimberg break; 4277c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 42894648b7dSSebastian Grimberg if (use_signs) { 4291cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4301cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 43194648b7dSSebastian Grimberg } else { 432*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 433*eda0adbcSSebastian Grimberg v_offset, uu, vv)); 43494648b7dSSebastian Grimberg } 43594648b7dSSebastian Grimberg break; 43694648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 43794648b7dSSebastian Grimberg if (use_signs && use_orients) { 4381cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4395d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 44094648b7dSSebastian Grimberg } else if (use_orients) { 4411cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4421cc2c60dSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 44394648b7dSSebastian Grimberg } else { 444*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 445*eda0adbcSSebastian Grimberg v_offset, uu, vv)); 44694648b7dSSebastian Grimberg } 44794648b7dSSebastian Grimberg break; 4482c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4491249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4502c7e7413SJeremy L Thompson break; 45194648b7dSSebastian Grimberg } 45294648b7dSSebastian Grimberg } else { 45394648b7dSSebastian Grimberg // Restriction from L-vector to E-vector 45494648b7dSSebastian Grimberg // Perform: v = r * u 45594648b7dSSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 45694648b7dSSebastian Grimberg // uu has shape [nnodes, num_comp] 45794648b7dSSebastian Grimberg // Overwrite for notranspose mode 45894648b7dSSebastian Grimberg switch (rstr_type) { 45994648b7dSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4605d10938bSJeremy L Thompson CeedCallBackend( 4611cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 46294648b7dSSebastian Grimberg break; 46361a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 464*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 465*eda0adbcSSebastian Grimberg v_offset, uu, vv)); 46694648b7dSSebastian Grimberg break; 46794648b7dSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 46894648b7dSSebastian Grimberg if (use_signs) { 4691cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4701cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 47194648b7dSSebastian Grimberg } else { 472*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4731cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 47494648b7dSSebastian Grimberg } 47594648b7dSSebastian Grimberg break; 47694648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 47794648b7dSSebastian Grimberg if (use_signs && use_orients) { 4781cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4795d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 48094648b7dSSebastian Grimberg } else if (use_orients) { 4811cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4825d10938bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 48394648b7dSSebastian Grimberg } else { 484*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4851cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 48694648b7dSSebastian Grimberg } 48794648b7dSSebastian Grimberg break; 4882c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4891249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4902c7e7413SJeremy L Thompson break; 49194648b7dSSebastian Grimberg } 4927c1dbaffSSebastian Grimberg } 4937c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 4947c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 4957c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 4967c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 4977c1dbaffSSebastian Grimberg } 4987c1dbaffSSebastian Grimberg 4997c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 500f10650afSjeremylt // ElemRestriction Apply - Common Sizes 501f10650afSjeremylt //------------------------------------------------------------------------------ 5021cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5037c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5047c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5051cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 506d979a051Sjeremylt } 507d979a051Sjeremylt 5081cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5097c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5107c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5111cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5124d2a38eeSjeremylt } 5134d2a38eeSjeremylt 5141cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5157c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5167c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5171cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 5189c36149bSjeremylt } 5199c36149bSjeremylt 5201cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5217c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5227c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5231cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5249c36149bSjeremylt } 5259c36149bSjeremylt 5261cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5277c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5287c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5291cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 530d979a051Sjeremylt } 531d979a051Sjeremylt 5321cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5337c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5347c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5351cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 536d979a051Sjeremylt } 537d979a051Sjeremylt 5381cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5397c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5407c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5411cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 542d979a051Sjeremylt } 543d979a051Sjeremylt 5441cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5457c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5467c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5471cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 548d979a051Sjeremylt } 549d979a051Sjeremylt 550bf4d1581Sjeremylt // LCOV_EXCL_START 5511cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5527c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5537c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5541cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 555d979a051Sjeremylt } 556bf4d1581Sjeremylt // LCOV_EXCL_STOP 557d979a051Sjeremylt 5581cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5597c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5607c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5611cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 562d979a051Sjeremylt } 563d979a051Sjeremylt 564bf4d1581Sjeremylt // LCOV_EXCL_START 5651cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5667c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5677c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5681cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 569d979a051Sjeremylt } 570bf4d1581Sjeremylt // LCOV_EXCL_STOP 571d979a051Sjeremylt 5721cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5737c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5740c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 5751cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5764d2a38eeSjeremylt } 5774d2a38eeSjeremylt 578f10650afSjeremylt //------------------------------------------------------------------------------ 579f10650afSjeremylt // ElemRestriction Apply 580f10650afSjeremylt //------------------------------------------------------------------------------ 5811cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 582ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 583ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 584ad70ee2cSJeremy L Thompson 5851cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 5861cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 5871cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 5881cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 5891cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 5901cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 5915d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 592f30b1135SSebastian Grimberg } 593f30b1135SSebastian Grimberg 594f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 595f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 596f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 5971cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 5981cc2c60dSJeremy L Thompson CeedRequest *request) { 599ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 600ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 601ad70ee2cSJeremy L Thompson 6021cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6031cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6041cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6051cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6061cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6071cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 6085d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6097c1dbaffSSebastian Grimberg } 6107c1dbaffSSebastian Grimberg 6117c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6127c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented 6137c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6141cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6151cc2c60dSJeremy L Thompson CeedRequest *request) { 616ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 617ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 618ad70ee2cSJeremy L Thompson 6191cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6201cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6211cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6221cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6231cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6241cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 6255d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6269c36149bSjeremylt } 627be9261b7Sjeremylt 628f10650afSjeremylt //------------------------------------------------------------------------------ 6292c7e7413SJeremy L Thompson // ElemRestriction Apply Points 6302c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 631*eda0adbcSSebastian Grimberg static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6322c7e7413SJeremy L Thompson CeedRequest *request) { 63305fa913cSJeremy L Thompson CeedInt num_comp; 6342c7e7413SJeremy L Thompson CeedElemRestriction_Ref *impl; 6352c7e7413SJeremy L Thompson 636*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 637*eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 638*eda0adbcSSebastian Grimberg return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 6392c7e7413SJeremy L Thompson } 6402c7e7413SJeremy L Thompson 6412c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 642f10650afSjeremylt // ElemRestriction Apply Block 643f10650afSjeremylt //------------------------------------------------------------------------------ 6441cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 645074cb416Sjeremylt CeedRequest *request) { 646ad70ee2cSJeremy L Thompson CeedInt block_size, num_comp, comp_stride; 647ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 648ad70ee2cSJeremy L Thompson 6491cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6501cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6511cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6521cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6531cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 6545d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6559c36149bSjeremylt } 656be9261b7Sjeremylt 657f10650afSjeremylt //------------------------------------------------------------------------------ 658bd33150aSjeremylt // ElemRestriction Get Offsets 659bd33150aSjeremylt //------------------------------------------------------------------------------ 6602b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 661bd33150aSjeremylt Ceed ceed; 662ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 663ad70ee2cSJeremy L Thompson 664ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6652b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 666bd33150aSjeremylt 6676574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 668bd33150aSjeremylt 669bd33150aSjeremylt *offsets = impl->offsets; 670e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 671bd33150aSjeremylt } 672bd33150aSjeremylt 673bd33150aSjeremylt //------------------------------------------------------------------------------ 67477d1c127SSebastian Grimberg // ElemRestriction Get Orientations 67577d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 67677d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 67777d1c127SSebastian Grimberg Ceed ceed; 678ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 679ad70ee2cSJeremy L Thompson 680ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 68177d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 68277d1c127SSebastian Grimberg 683fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 68477d1c127SSebastian Grimberg 68577d1c127SSebastian Grimberg *orients = impl->orients; 68677d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 68777d1c127SSebastian Grimberg } 68877d1c127SSebastian Grimberg 68977d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 69077d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations 69177d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 6920c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 69377d1c127SSebastian Grimberg Ceed ceed; 694ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 695ad70ee2cSJeremy L Thompson 696ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 69777d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 69877d1c127SSebastian Grimberg 699fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 70077d1c127SSebastian Grimberg 70177d1c127SSebastian Grimberg *curl_orients = impl->curl_orients; 70277d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 70377d1c127SSebastian Grimberg } 70477d1c127SSebastian Grimberg 70577d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 706f10650afSjeremylt // ElemRestriction Destroy 707f10650afSjeremylt //------------------------------------------------------------------------------ 7081cc2c60dSJeremy L Thompson static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 709fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 71021617c04Sjeremylt 7111cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 7122b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 71377d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->orients_allocated)); 71477d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 7152b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 716e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 71721617c04Sjeremylt } 71821617c04Sjeremylt 719f10650afSjeremylt //------------------------------------------------------------------------------ 720f10650afSjeremylt // ElemRestriction Create 721f10650afSjeremylt //------------------------------------------------------------------------------ 722fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 7231cc2c60dSJeremy L Thompson const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 724ad70ee2cSJeremy L Thompson Ceed ceed; 72507d5dec1SJeremy L Thompson CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 726ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 72721617c04Sjeremylt CeedElemRestriction_Ref *impl; 728ad70ee2cSJeremy L Thompson 7291cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 7301cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 7311cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 7321cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 7331cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 7341cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 7351cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 736ad70ee2cSJeremy L Thompson CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 73721617c04Sjeremylt 7386574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 7392b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 7403661185eSjeremylt 74192fe105eSJeremy L Thompson // Offsets data 7421cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 743fcbe8c06SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 7443661185eSjeremylt const char *resource; 745ad70ee2cSJeremy L Thompson 746ad70ee2cSJeremy L Thompson // Check indices for ref or memcheck backends 74735aed383SJeremy L Thompson { 74835aed383SJeremy L Thompson Ceed current = ceed, parent = NULL; 74935aed383SJeremy L Thompson 75035aed383SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 75135aed383SJeremy L Thompson while (current != parent) { 75235aed383SJeremy L Thompson current = parent; 75335aed383SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 75435aed383SJeremy L Thompson } 75535aed383SJeremy L Thompson CeedCallBackend(CeedGetResource(parent, &resource)); 75635aed383SJeremy L Thompson } 7572b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 758d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 759e79b91d9SJeremy L Thompson CeedSize l_size; 7603661185eSjeremylt 7611cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 7622b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 7636574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 7646574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 7652b730f8bSJeremy L Thompson } 7662b730f8bSJeremy L Thompson } 7673661185eSjeremylt 76892fe105eSJeremy L Thompson // Copy data 76907d5dec1SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 77007d5dec1SJeremy L Thompson num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 771d1d35e2fSjeremylt switch (copy_mode) { 77221617c04Sjeremylt case CEED_COPY_VALUES: 77307d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 77407d5dec1SJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 775d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 77621617c04Sjeremylt break; 77721617c04Sjeremylt case CEED_OWN_POINTER: 778d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 779d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 78021617c04Sjeremylt break; 78121617c04Sjeremylt case CEED_USE_POINTER: 782d979a051Sjeremylt impl->offsets = offsets; 78321617c04Sjeremylt } 784fcbe8c06SSebastian Grimberg 785fcbe8c06SSebastian Grimberg // Orientation data 786fcbe8c06SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 7870305e208SSebastian Grimberg CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 788fcbe8c06SSebastian Grimberg switch (copy_mode) { 789fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 79007d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 79107d5dec1SJeremy L Thompson memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 792fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 793fcbe8c06SSebastian Grimberg break; 794fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 795fcbe8c06SSebastian Grimberg impl->orients_allocated = (bool *)orients; 796fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 797fcbe8c06SSebastian Grimberg break; 798fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 799fcbe8c06SSebastian Grimberg impl->orients = orients; 800fcbe8c06SSebastian Grimberg } 801fcbe8c06SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 8020305e208SSebastian Grimberg CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 803fcbe8c06SSebastian Grimberg switch (copy_mode) { 804fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 80507d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 80607d5dec1SJeremy L Thompson memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 807fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 808fcbe8c06SSebastian Grimberg break; 809fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 8100c73c039SSebastian Grimberg impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 811fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 812fcbe8c06SSebastian Grimberg break; 813fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 814fcbe8c06SSebastian Grimberg impl->curl_orients = curl_orients; 815fcbe8c06SSebastian Grimberg } 816fcbe8c06SSebastian Grimberg } 81792fe105eSJeremy L Thompson } 818fe2413ffSjeremylt 8191cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 8201cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 8211cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 8222c7e7413SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 8231249ccc5SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 8241249ccc5SJeremy L Thompson } 8251cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 8261cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 8271cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 8281cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 8291cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 8302c7e7413SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 8311cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 832d979a051Sjeremylt 833ad70ee2cSJeremy L Thompson // Set apply function based upon num_comp, block_size, and comp_stride 834ad70ee2cSJeremy L Thompson CeedInt index = -1; 835ad70ee2cSJeremy L Thompson 836ad70ee2cSJeremy L Thompson if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 837ad70ee2cSJeremy L Thompson switch (index) { 838d979a051Sjeremylt case 110: 839d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 840d979a051Sjeremylt break; 841d979a051Sjeremylt case 111: 842d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 843d979a051Sjeremylt break; 844d979a051Sjeremylt case 180: 845d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 846d979a051Sjeremylt break; 847d979a051Sjeremylt case 181: 848d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 849d979a051Sjeremylt break; 850d979a051Sjeremylt case 310: 851d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 852d979a051Sjeremylt break; 853d979a051Sjeremylt case 311: 854d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 855d979a051Sjeremylt break; 856d979a051Sjeremylt case 380: 857d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 858d979a051Sjeremylt break; 859d979a051Sjeremylt case 381: 860d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 861d979a051Sjeremylt break; 862bf4d1581Sjeremylt // LCOV_EXCL_START 863d979a051Sjeremylt case 510: 864d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 865d979a051Sjeremylt break; 866bf4d1581Sjeremylt // LCOV_EXCL_STOP 867d979a051Sjeremylt case 511: 868d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 869d979a051Sjeremylt break; 870bf4d1581Sjeremylt // LCOV_EXCL_START 871d979a051Sjeremylt case 580: 872d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 873d979a051Sjeremylt break; 874bf4d1581Sjeremylt // LCOV_EXCL_STOP 875d979a051Sjeremylt case 581: 876d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 877d979a051Sjeremylt break; 878d979a051Sjeremylt default: 879d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 880d979a051Sjeremylt break; 881d979a051Sjeremylt } 882e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 88321617c04Sjeremylt } 884fc0567d9Srezgarshakeri 885fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 886