13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 11fcbe8c06SSebastian Grimberg #include <stdlib.h> 123d576824SJeremy L Thompson #include <string.h> 132b730f8bSJeremy L Thompson 1421617c04Sjeremylt #include "ceed-ref.h" 1521617c04Sjeremylt 16f10650afSjeremylt //------------------------------------------------------------------------------ 17f10650afSjeremylt // Core ElemRestriction Apply Code 18f10650afSjeremylt //------------------------------------------------------------------------------ 191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 2094648b7dSSebastian Grimberg CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 2194648b7dSSebastian Grimberg CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 2294648b7dSSebastian Grimberg // No offsets provided, identity restriction 23d1d35e2fSjeremylt bool has_backend_strides; 24ad70ee2cSJeremy L Thompson 251cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 26d1d35e2fSjeremylt if (has_backend_strides) { 27d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 287f90ec76Sjeremylt // This if branch is left separate to allow better inlining 29ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 302b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 312b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 32ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 33ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 342b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 352b730f8bSJeremy L Thompson } 362b730f8bSJeremy L Thompson } 372b730f8bSJeremy L Thompson } 382b730f8bSJeremy L Thompson } 397f90ec76Sjeremylt } else { 407f90ec76Sjeremylt // User provided strides 417f90ec76Sjeremylt CeedInt strides[3]; 42ad70ee2cSJeremy L Thompson 431cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 44ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 452b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 462b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 47ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 48ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 492b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 502b730f8bSJeremy L Thompson } 512b730f8bSJeremy L Thompson } 522b730f8bSJeremy L Thompson } 532b730f8bSJeremy L Thompson } 547509a596Sjeremylt } 5594648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 5694648b7dSSebastian Grimberg } 5794648b7dSSebastian Grimberg 581cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 5994648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 6094648b7dSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 61fcbe8c06SSebastian Grimberg // Default restriction with offsets 6294648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 63ad70ee2cSJeremy L Thompson 641cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 65ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 662b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 67ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 68ad70ee2cSJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 69fcbe8c06SSebastian Grimberg } 70fcbe8c06SSebastian Grimberg } 71fcbe8c06SSebastian Grimberg } 7294648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 7394648b7dSSebastian Grimberg } 7494648b7dSSebastian Grimberg 751cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 7694648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 7794648b7dSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 78fcbe8c06SSebastian Grimberg // Restriction with orientations 7994648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 80ad70ee2cSJeremy L Thompson 811cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 82ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 83fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 84ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 85ad70ee2cSJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = 867c1dbaffSSebastian Grimberg uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 87fcbe8c06SSebastian Grimberg } 88fcbe8c06SSebastian Grimberg } 89fcbe8c06SSebastian Grimberg } 9094648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 9194648b7dSSebastian Grimberg } 9294648b7dSSebastian Grimberg 931cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 9494648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 9594648b7dSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, 9694648b7dSSebastian Grimberg CeedScalar *vv) { 9777d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 9894648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 99ad70ee2cSJeremy L Thompson 1001cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 101ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 102fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1030c73c039SSebastian Grimberg CeedInt n = 0; 104ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 105ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 106ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 107ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 108ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 109ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1100c73c039SSebastian Grimberg } 1110c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 112ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 113ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 114ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 115ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 116ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 117ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 118ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 119ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1200c73c039SSebastian Grimberg } 1210c73c039SSebastian Grimberg } 122ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 123ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 124ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 125ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 126ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 127ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 1282b730f8bSJeremy L Thompson } 1292b730f8bSJeremy L Thompson } 1302b730f8bSJeremy L Thompson } 1310c73c039SSebastian Grimberg return CEED_ERROR_SUCCESS; 132fcbe8c06SSebastian Grimberg } 1330c73c039SSebastian Grimberg 1341cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 135ad70ee2cSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 13694648b7dSSebastian Grimberg CeedInt stop, CeedInt num_elem, CeedInt elem_size, 13794648b7dSSebastian Grimberg CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 13894648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 1390c73c039SSebastian Grimberg CeedElemRestriction_Ref *impl; 140ad70ee2cSJeremy L Thompson 1411cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 142ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 1437c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1447c1dbaffSSebastian Grimberg CeedInt n = 0; 145ad70ee2cSJeremy L Thompson 146ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 147ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 148ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 149ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 150ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 151ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1527c1dbaffSSebastian Grimberg } 1537c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 154ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 155ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 156ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 157ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 158ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 159ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 160ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 161ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1627c1dbaffSSebastian Grimberg } 1637c1dbaffSSebastian Grimberg } 164ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 165ad70ee2cSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 166ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 167ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 168ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 169ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 1707c1dbaffSSebastian Grimberg } 1717c1dbaffSSebastian Grimberg } 1727c1dbaffSSebastian Grimberg } 1737c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 1747c1dbaffSSebastian Grimberg } 1757c1dbaffSSebastian Grimberg 1761cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 17794648b7dSSebastian Grimberg CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 17894648b7dSSebastian Grimberg CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 17994648b7dSSebastian Grimberg // No offsets provided, identity restriction 180d1d35e2fSjeremylt bool has_backend_strides; 181ad70ee2cSJeremy L Thompson 1821cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 183d1d35e2fSjeremylt if (has_backend_strides) { 184d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1857f90ec76Sjeremylt // This if brach is left separate to allow better inlining 186ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 1872b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1882b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 189ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 19058c07c4fSSebastian Grimberg CeedScalar uu_val; 19158c07c4fSSebastian Grimberg 19258c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 19358c07c4fSSebastian Grimberg CeedPragmaAtomic vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu_val; 1942b730f8bSJeremy L Thompson } 1952b730f8bSJeremy L Thompson } 1962b730f8bSJeremy L Thompson } 1972b730f8bSJeremy L Thompson } 1987f90ec76Sjeremylt } else { 1997f90ec76Sjeremylt // User provided strides 2007f90ec76Sjeremylt CeedInt strides[3]; 201ad70ee2cSJeremy L Thompson 2021cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 203ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 2042b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 2052b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 206ad70ee2cSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 20758c07c4fSSebastian Grimberg CeedScalar uu_val; 20858c07c4fSSebastian Grimberg 20958c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 21058c07c4fSSebastian Grimberg CeedPragmaAtomic vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += uu_val; 2112b730f8bSJeremy L Thompson } 2122b730f8bSJeremy L Thompson } 2132b730f8bSJeremy L Thompson } 2142b730f8bSJeremy L Thompson } 215523b8ea0Sjeremylt } 21694648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 21794648b7dSSebastian Grimberg } 21894648b7dSSebastian Grimberg 2191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStandardTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 22094648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 22194648b7dSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 222fcbe8c06SSebastian Grimberg // Default restriction with offsets 22394648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 224ad70ee2cSJeremy L Thompson 2251cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 226ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 2272b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 228ad70ee2cSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 2298d94b059Sjeremylt // Iteration bound set to discard padding elements 230ad70ee2cSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 23158c07c4fSSebastian Grimberg CeedScalar uu_val; 23258c07c4fSSebastian Grimberg 23358c07c4fSSebastian Grimberg uu_val = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 23458c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu_val; 235fcbe8c06SSebastian Grimberg } 236fcbe8c06SSebastian Grimberg } 237fcbe8c06SSebastian Grimberg } 238fcbe8c06SSebastian Grimberg } 23994648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 24094648b7dSSebastian Grimberg } 24194648b7dSSebastian Grimberg 2421cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 24394648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 24494648b7dSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 245fcbe8c06SSebastian Grimberg // Restriction with orientations 24694648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 247ad70ee2cSJeremy L Thompson 2481cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 249ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 250fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 251ad70ee2cSJeremy L Thompson for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 252fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 253ad70ee2cSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 25458c07c4fSSebastian Grimberg CeedScalar uu_val; 25558c07c4fSSebastian Grimberg 25658c07c4fSSebastian Grimberg uu_val = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 25758c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu_val; 258fcbe8c06SSebastian Grimberg } 259fcbe8c06SSebastian Grimberg } 260fcbe8c06SSebastian Grimberg } 261fcbe8c06SSebastian Grimberg } 26294648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 26394648b7dSSebastian Grimberg } 26494648b7dSSebastian Grimberg 2651cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 26694648b7dSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 26794648b7dSSebastian Grimberg CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 26877d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 26994648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 270ad70ee2cSJeremy L Thompson 2711cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 272ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 273fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 274fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 27558c07c4fSSebastian Grimberg const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 27658c07c4fSSebastian Grimberg CeedInt n = 0; 27758c07c4fSSebastian Grimberg 278ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 27958c07c4fSSebastian Grimberg CeedScalar uu_val; 28058c07c4fSSebastian Grimberg 28158c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 282ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 283ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 284ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 28558c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 2860c73c039SSebastian Grimberg } 2870c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 288ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 28958c07c4fSSebastian Grimberg CeedScalar uu_val; 29058c07c4fSSebastian Grimberg 29158c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 292ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 293ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 294ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 295ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 296ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 29758c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 2980c73c039SSebastian Grimberg } 2990c73c039SSebastian Grimberg } 300ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 30158c07c4fSSebastian Grimberg CeedScalar uu_val; 30258c07c4fSSebastian Grimberg 30358c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 304ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 305ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 306ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 30758c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 30821617c04Sjeremylt } 309b435c5a6Srezgarshakeri } 3102b730f8bSJeremy L Thompson } 311e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31221617c04Sjeremylt } 31321617c04Sjeremylt 3141cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 315ad70ee2cSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 31694648b7dSSebastian Grimberg CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 31794648b7dSSebastian Grimberg const CeedScalar *uu, CeedScalar *vv) { 31894648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 3197c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 320ad70ee2cSJeremy L Thompson 3211cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 322ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 3237c1dbaffSSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 3247c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 325ad70ee2cSJeremy L Thompson const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 32658c07c4fSSebastian Grimberg CeedInt n = 0; 327ad70ee2cSJeremy L Thompson 328ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 32958c07c4fSSebastian Grimberg CeedScalar uu_val; 33058c07c4fSSebastian Grimberg 33158c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 332ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 333ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 334ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 33558c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3367c1dbaffSSebastian Grimberg } 3377c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 338ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 33958c07c4fSSebastian Grimberg CeedScalar uu_val; 34058c07c4fSSebastian Grimberg 34158c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 342ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 343ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 344ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 345ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 346ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 34758c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3487c1dbaffSSebastian Grimberg } 3497c1dbaffSSebastian Grimberg } 350ad70ee2cSJeremy L Thompson for (CeedInt j = 0; j < block_end; j++) { 35158c07c4fSSebastian Grimberg CeedScalar uu_val; 35258c07c4fSSebastian Grimberg 35358c07c4fSSebastian Grimberg uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 354ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 355ad70ee2cSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 356ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 35758c07c4fSSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; 3587c1dbaffSSebastian Grimberg } 3597c1dbaffSSebastian Grimberg } 3607c1dbaffSSebastian Grimberg } 3617c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 3627c1dbaffSSebastian Grimberg } 3637c1dbaffSSebastian Grimberg 3641249ccc5SJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 36505fa913cSJeremy L Thompson CeedTransposeMode t_mode, const CeedScalar *uu, CeedScalar *vv) { 36605fa913cSJeremy L Thompson CeedInt num_points, l_vec_offset, e_vec_offset = 0; 36705fa913cSJeremy L Thompson CeedElemRestriction_Ref *impl; 36805fa913cSJeremy L Thompson 36905fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 37005fa913cSJeremy L Thompson 37105fa913cSJeremy L Thompson for (CeedInt e = start; e < stop; e++) { 3720930e4e7SJeremy L Thompson l_vec_offset = impl->offsets[e]; 37305fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 37405fa913cSJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 37505fa913cSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 37605fa913cSJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[i * num_comp + j + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 37705fa913cSJeremy L Thompson } 37805fa913cSJeremy L Thompson } else { 37905fa913cSJeremy L Thompson for (CeedInt i = 0; i < num_points; i++) { 38005fa913cSJeremy L Thompson for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[i * num_comp + j + e_vec_offset]; 38105fa913cSJeremy L Thompson } 38205fa913cSJeremy L Thompson } 38305fa913cSJeremy L Thompson e_vec_offset += num_points * num_comp; 38405fa913cSJeremy L Thompson } 38505fa913cSJeremy L Thompson return CEED_ERROR_SUCCESS; 38605fa913cSJeremy L Thompson } 38705fa913cSJeremy L Thompson 3881cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 389ad70ee2cSJeremy L Thompson const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, 390ad70ee2cSJeremy L Thompson bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 391ad70ee2cSJeremy L Thompson CeedInt num_elem, elem_size, v_offset; 392ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 3937c1dbaffSSebastian Grimberg const CeedScalar *uu; 3947c1dbaffSSebastian Grimberg CeedScalar *vv; 395ad70ee2cSJeremy L Thompson 3961cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 3971cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 398ad70ee2cSJeremy L Thompson v_offset = start * block_size * elem_size * num_comp; 3991cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 40094648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 401ad70ee2cSJeremy L Thompson 40294648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 40394648b7dSSebastian Grimberg // Sum into for transpose mode, E-vector to L-vector 40494648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 40594648b7dSSebastian Grimberg } else { 40694648b7dSSebastian Grimberg // Overwrite for notranspose mode, L-vector to E-vector 40794648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 40894648b7dSSebastian Grimberg } 40994648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 4107c1dbaffSSebastian Grimberg // Restriction from E-vector to L-vector 4117c1dbaffSSebastian Grimberg // Performing v += r^T * u 4127c1dbaffSSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 4137c1dbaffSSebastian Grimberg // vv has shape [nnodes, num_comp] 4147c1dbaffSSebastian Grimberg // Sum into for transpose mode 4157c1dbaffSSebastian Grimberg switch (rstr_type) { 4167c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4175d10938bSJeremy L Thompson CeedCallBackend( 4181cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 41994648b7dSSebastian Grimberg break; 42061a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 4211cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyStandardTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 4225d10938bSJeremy L Thompson v_offset, uu, vv)); 42394648b7dSSebastian Grimberg break; 4247c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 42594648b7dSSebastian Grimberg if (use_signs) { 4261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4271cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 42894648b7dSSebastian Grimberg } else { 4291cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyStandardTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4301cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 43194648b7dSSebastian Grimberg } 43294648b7dSSebastian Grimberg break; 43394648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 43494648b7dSSebastian Grimberg if (use_signs && use_orients) { 4351cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4365d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 43794648b7dSSebastian Grimberg } else if (use_orients) { 4381cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4391cc2c60dSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 44094648b7dSSebastian Grimberg } else { 4411cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyStandardTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4421cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 44394648b7dSSebastian Grimberg } 44494648b7dSSebastian Grimberg break; 4452c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4461249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4472c7e7413SJeremy L Thompson break; 44894648b7dSSebastian Grimberg } 44994648b7dSSebastian Grimberg } else { 45094648b7dSSebastian Grimberg // Restriction from L-vector to E-vector 45194648b7dSSebastian Grimberg // Perform: v = r * u 45294648b7dSSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 45394648b7dSSebastian Grimberg // uu has shape [nnodes, num_comp] 45494648b7dSSebastian Grimberg // Overwrite for notranspose mode 45594648b7dSSebastian Grimberg switch (rstr_type) { 45694648b7dSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4575d10938bSJeremy L Thompson CeedCallBackend( 4581cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 45994648b7dSSebastian Grimberg break; 46061a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 4611cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4621cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 46394648b7dSSebastian Grimberg break; 46494648b7dSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 46594648b7dSSebastian Grimberg if (use_signs) { 4661cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4671cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 46894648b7dSSebastian Grimberg } else { 4691cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4701cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 47194648b7dSSebastian Grimberg } 47294648b7dSSebastian Grimberg break; 47394648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 47494648b7dSSebastian Grimberg if (use_signs && use_orients) { 4751cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4765d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 47794648b7dSSebastian Grimberg } else if (use_orients) { 4781cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4795d10938bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 48094648b7dSSebastian Grimberg } else { 4811cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4821cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 48394648b7dSSebastian Grimberg } 48494648b7dSSebastian Grimberg break; 4852c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4861249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4872c7e7413SJeremy L Thompson break; 48894648b7dSSebastian Grimberg } 4897c1dbaffSSebastian Grimberg } 4907c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 4917c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 4927c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 4937c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 4947c1dbaffSSebastian Grimberg } 4957c1dbaffSSebastian Grimberg 4967c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 497f10650afSjeremylt // ElemRestriction Apply - Common Sizes 498f10650afSjeremylt //------------------------------------------------------------------------------ 4991cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5007c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5017c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5021cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 503d979a051Sjeremylt } 504d979a051Sjeremylt 5051cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5067c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5077c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5081cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5094d2a38eeSjeremylt } 5104d2a38eeSjeremylt 5111cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5127c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5137c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5141cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 5159c36149bSjeremylt } 5169c36149bSjeremylt 5171cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5187c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5197c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5201cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5219c36149bSjeremylt } 5229c36149bSjeremylt 5231cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5247c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5257c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5261cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 527d979a051Sjeremylt } 528d979a051Sjeremylt 5291cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5307c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5317c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5321cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 533d979a051Sjeremylt } 534d979a051Sjeremylt 5351cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5367c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5377c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5381cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 539d979a051Sjeremylt } 540d979a051Sjeremylt 5411cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5427c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5437c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5441cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 545d979a051Sjeremylt } 546d979a051Sjeremylt 547bf4d1581Sjeremylt // LCOV_EXCL_START 5481cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5497c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5507c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5511cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 552d979a051Sjeremylt } 553bf4d1581Sjeremylt // LCOV_EXCL_STOP 554d979a051Sjeremylt 5551cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5567c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5577c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5581cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 559d979a051Sjeremylt } 560d979a051Sjeremylt 561bf4d1581Sjeremylt // LCOV_EXCL_START 5621cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5637c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5647c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5651cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 566d979a051Sjeremylt } 567bf4d1581Sjeremylt // LCOV_EXCL_STOP 568d979a051Sjeremylt 5691cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5707c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5710c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 5721cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5734d2a38eeSjeremylt } 5744d2a38eeSjeremylt 575f10650afSjeremylt //------------------------------------------------------------------------------ 576f10650afSjeremylt // ElemRestriction Apply 577f10650afSjeremylt //------------------------------------------------------------------------------ 5781cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 579ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 580ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 581ad70ee2cSJeremy L Thompson 5821cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 5831cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 5841cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 5851cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 5861cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 5871cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 5885d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 589f30b1135SSebastian Grimberg } 590f30b1135SSebastian Grimberg 591f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 592f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 593f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 5941cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 5951cc2c60dSJeremy L Thompson CeedRequest *request) { 596ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 597ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 598ad70ee2cSJeremy L Thompson 5991cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6001cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6011cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6021cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6031cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6041cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 6055d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6067c1dbaffSSebastian Grimberg } 6077c1dbaffSSebastian Grimberg 6087c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6097c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented 6107c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6111cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6121cc2c60dSJeremy L Thompson CeedRequest *request) { 613ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 614ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 615ad70ee2cSJeremy L Thompson 6161cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6171cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6181cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6191cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6201cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6211cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 6225d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6239c36149bSjeremylt } 624be9261b7Sjeremylt 625f10650afSjeremylt //------------------------------------------------------------------------------ 6262c7e7413SJeremy L Thompson // ElemRestriction Apply Points 6272c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 6281249ccc5SJeremy L Thompson static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction r, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6292c7e7413SJeremy L Thompson CeedRequest *request) { 63005fa913cSJeremy L Thompson CeedInt num_comp; 6312c7e7413SJeremy L Thompson CeedElemRestriction_Ref *impl; 6322c7e7413SJeremy L Thompson 6332c7e7413SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 63405fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 63505fa913cSJeremy L Thompson return impl->Apply(r, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 6362c7e7413SJeremy L Thompson } 6372c7e7413SJeremy L Thompson 6382c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 639f10650afSjeremylt // ElemRestriction Apply Block 640f10650afSjeremylt //------------------------------------------------------------------------------ 6411cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 642074cb416Sjeremylt CeedRequest *request) { 643ad70ee2cSJeremy L Thompson CeedInt block_size, num_comp, comp_stride; 644ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 645ad70ee2cSJeremy L Thompson 6461cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6471cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6481cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6491cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6501cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 6515d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6529c36149bSjeremylt } 653be9261b7Sjeremylt 654f10650afSjeremylt //------------------------------------------------------------------------------ 655bd33150aSjeremylt // ElemRestriction Get Offsets 656bd33150aSjeremylt //------------------------------------------------------------------------------ 6572b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 658bd33150aSjeremylt Ceed ceed; 659ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 660ad70ee2cSJeremy L Thompson 661ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6622b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 663bd33150aSjeremylt 6646574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 665bd33150aSjeremylt 666bd33150aSjeremylt *offsets = impl->offsets; 667e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 668bd33150aSjeremylt } 669bd33150aSjeremylt 670bd33150aSjeremylt //------------------------------------------------------------------------------ 67177d1c127SSebastian Grimberg // ElemRestriction Get Orientations 67277d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 67377d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 67477d1c127SSebastian Grimberg Ceed ceed; 675ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 676ad70ee2cSJeremy L Thompson 677ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 67877d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 67977d1c127SSebastian Grimberg 680fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 68177d1c127SSebastian Grimberg 68277d1c127SSebastian Grimberg *orients = impl->orients; 68377d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 68477d1c127SSebastian Grimberg } 68577d1c127SSebastian Grimberg 68677d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 68777d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations 68877d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 6890c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 69077d1c127SSebastian Grimberg Ceed ceed; 691ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 692ad70ee2cSJeremy L Thompson 693ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 69477d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 69577d1c127SSebastian Grimberg 696fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 69777d1c127SSebastian Grimberg 69877d1c127SSebastian Grimberg *curl_orients = impl->curl_orients; 69977d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 70077d1c127SSebastian Grimberg } 70177d1c127SSebastian Grimberg 70277d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 703f10650afSjeremylt // ElemRestriction Destroy 704f10650afSjeremylt //------------------------------------------------------------------------------ 7051cc2c60dSJeremy L Thompson static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 706fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 70721617c04Sjeremylt 7081cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 7092b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 71077d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->orients_allocated)); 71177d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 7122b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 713e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 71421617c04Sjeremylt } 71521617c04Sjeremylt 716f10650afSjeremylt //------------------------------------------------------------------------------ 717f10650afSjeremylt // ElemRestriction Create 718f10650afSjeremylt //------------------------------------------------------------------------------ 719fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 7201cc2c60dSJeremy L Thompson const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 721ad70ee2cSJeremy L Thompson Ceed ceed; 72207d5dec1SJeremy L Thompson CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 723ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 72421617c04Sjeremylt CeedElemRestriction_Ref *impl; 725ad70ee2cSJeremy L Thompson 7261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 7271cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 7281cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 7291cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 7301cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 7311cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 7321cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 733ad70ee2cSJeremy L Thompson CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 73421617c04Sjeremylt 7356574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 7362b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 7373661185eSjeremylt 73892fe105eSJeremy L Thompson // Offsets data 7391cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 740fcbe8c06SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 7413661185eSjeremylt const char *resource; 742ad70ee2cSJeremy L Thompson 743ad70ee2cSJeremy L Thompson // Check indices for ref or memcheck backends 744*35aed383SJeremy L Thompson { 745*35aed383SJeremy L Thompson Ceed current = ceed, parent = NULL; 746*35aed383SJeremy L Thompson 747*35aed383SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 748*35aed383SJeremy L Thompson while (current != parent) { 749*35aed383SJeremy L Thompson current = parent; 750*35aed383SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &parent)); 751*35aed383SJeremy L Thompson } 752*35aed383SJeremy L Thompson CeedCallBackend(CeedGetResource(parent, &resource)); 753*35aed383SJeremy L Thompson } 7542b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 755d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 756e79b91d9SJeremy L Thompson CeedSize l_size; 7573661185eSjeremylt 7581cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 7592b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 7606574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 7616574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 7622b730f8bSJeremy L Thompson } 7632b730f8bSJeremy L Thompson } 7643661185eSjeremylt 76592fe105eSJeremy L Thompson // Copy data 76607d5dec1SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 76707d5dec1SJeremy L Thompson num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 768d1d35e2fSjeremylt switch (copy_mode) { 76921617c04Sjeremylt case CEED_COPY_VALUES: 77007d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 77107d5dec1SJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 772d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 77321617c04Sjeremylt break; 77421617c04Sjeremylt case CEED_OWN_POINTER: 775d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 776d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 77721617c04Sjeremylt break; 77821617c04Sjeremylt case CEED_USE_POINTER: 779d979a051Sjeremylt impl->offsets = offsets; 78021617c04Sjeremylt } 781fcbe8c06SSebastian Grimberg 782fcbe8c06SSebastian Grimberg // Orientation data 783fcbe8c06SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 7840305e208SSebastian Grimberg CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 785fcbe8c06SSebastian Grimberg switch (copy_mode) { 786fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 78707d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 78807d5dec1SJeremy L Thompson memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 789fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 790fcbe8c06SSebastian Grimberg break; 791fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 792fcbe8c06SSebastian Grimberg impl->orients_allocated = (bool *)orients; 793fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 794fcbe8c06SSebastian Grimberg break; 795fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 796fcbe8c06SSebastian Grimberg impl->orients = orients; 797fcbe8c06SSebastian Grimberg } 798fcbe8c06SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 7990305e208SSebastian Grimberg CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 800fcbe8c06SSebastian Grimberg switch (copy_mode) { 801fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 80207d5dec1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 80307d5dec1SJeremy L Thompson memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 804fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 805fcbe8c06SSebastian Grimberg break; 806fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 8070c73c039SSebastian Grimberg impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 808fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 809fcbe8c06SSebastian Grimberg break; 810fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 811fcbe8c06SSebastian Grimberg impl->curl_orients = curl_orients; 812fcbe8c06SSebastian Grimberg } 813fcbe8c06SSebastian Grimberg } 81492fe105eSJeremy L Thompson } 815fe2413ffSjeremylt 8161cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 8171cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 8181cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 8192c7e7413SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 8201249ccc5SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 8211249ccc5SJeremy L Thompson } 8221cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 8231cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 8241cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 8251cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 8261cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 8272c7e7413SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 8281cc2c60dSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 829d979a051Sjeremylt 830ad70ee2cSJeremy L Thompson // Set apply function based upon num_comp, block_size, and comp_stride 831ad70ee2cSJeremy L Thompson CeedInt index = -1; 832ad70ee2cSJeremy L Thompson 833ad70ee2cSJeremy L Thompson if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 834ad70ee2cSJeremy L Thompson switch (index) { 835d979a051Sjeremylt case 110: 836d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 837d979a051Sjeremylt break; 838d979a051Sjeremylt case 111: 839d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 840d979a051Sjeremylt break; 841d979a051Sjeremylt case 180: 842d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 843d979a051Sjeremylt break; 844d979a051Sjeremylt case 181: 845d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 846d979a051Sjeremylt break; 847d979a051Sjeremylt case 310: 848d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 849d979a051Sjeremylt break; 850d979a051Sjeremylt case 311: 851d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 852d979a051Sjeremylt break; 853d979a051Sjeremylt case 380: 854d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 855d979a051Sjeremylt break; 856d979a051Sjeremylt case 381: 857d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 858d979a051Sjeremylt break; 859bf4d1581Sjeremylt // LCOV_EXCL_START 860d979a051Sjeremylt case 510: 861d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 862d979a051Sjeremylt break; 863bf4d1581Sjeremylt // LCOV_EXCL_STOP 864d979a051Sjeremylt case 511: 865d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 866d979a051Sjeremylt break; 867bf4d1581Sjeremylt // LCOV_EXCL_START 868d979a051Sjeremylt case 580: 869d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 870d979a051Sjeremylt break; 871bf4d1581Sjeremylt // LCOV_EXCL_STOP 872d979a051Sjeremylt case 581: 873d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 874d979a051Sjeremylt break; 875d979a051Sjeremylt default: 876d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 877d979a051Sjeremylt break; 878d979a051Sjeremylt } 879e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 88021617c04Sjeremylt } 881fc0567d9Srezgarshakeri 882fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 883