1d275d636SJeremy L Thompson // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 11fcbe8c06SSebastian Grimberg #include <stdlib.h> 123d576824SJeremy L Thompson #include <string.h> 132b730f8bSJeremy L Thompson 1421617c04Sjeremylt #include "ceed-ref.h" 1521617c04Sjeremylt 16f10650afSjeremylt //------------------------------------------------------------------------------ 17f10650afSjeremylt // Core ElemRestriction Apply Code 18f10650afSjeremylt //------------------------------------------------------------------------------ 191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 20*4baa7aecSJeremy L Thompson const CeedInt start, const CeedInt stop, const CeedInt num_elem, 21*4baa7aecSJeremy L Thompson const CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 22eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 2394648b7dSSebastian Grimberg // No offsets provided, identity restriction 24d1d35e2fSjeremylt bool has_backend_strides; 25ad70ee2cSJeremy L Thompson 261cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 27d1d35e2fSjeremylt if (has_backend_strides) { 28d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 297f90ec76Sjeremylt // This if branch is left separate to allow better inlining 3033e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 31*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 32*4baa7aecSJeremy L Thompson for (CeedSize n = 0; n < elem_size; n++) { 3333e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 3433e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 3533e3c889SJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp]; 362b730f8bSJeremy L Thompson } 372b730f8bSJeremy L Thompson } 382b730f8bSJeremy L Thompson } 392b730f8bSJeremy L Thompson } 407f90ec76Sjeremylt } else { 417f90ec76Sjeremylt // User provided strides 427f90ec76Sjeremylt CeedInt strides[3]; 43ad70ee2cSJeremy L Thompson 4456c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 4533e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 46*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 47*4baa7aecSJeremy L Thompson for (CeedSize n = 0; n < elem_size; n++) { 4833e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 4933e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 5033e3c889SJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]]; 512b730f8bSJeremy L Thompson } 522b730f8bSJeremy L Thompson } 532b730f8bSJeremy L Thompson } 542b730f8bSJeremy L Thompson } 557509a596Sjeremylt } 5694648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 5794648b7dSSebastian Grimberg } 5894648b7dSSebastian Grimberg 59eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 60*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 61*4baa7aecSJeremy L Thompson const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 62*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 63fcbe8c06SSebastian Grimberg // Default restriction with offsets 6494648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 65ad70ee2cSJeremy L Thompson 661cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6733e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 68*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 6933e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { 7033e3c889SJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 71fcbe8c06SSebastian Grimberg } 72fcbe8c06SSebastian Grimberg } 73fcbe8c06SSebastian Grimberg } 7494648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 7594648b7dSSebastian Grimberg } 7694648b7dSSebastian Grimberg 771cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 78*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 79*4baa7aecSJeremy L Thompson const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 80*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 81fcbe8c06SSebastian Grimberg // Restriction with orientations 8294648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 83ad70ee2cSJeremy L Thompson 841cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 8533e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 86*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 8733e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { 8833e3c889SJeremy L Thompson vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = 897c1dbaffSSebastian Grimberg uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 90fcbe8c06SSebastian Grimberg } 91fcbe8c06SSebastian Grimberg } 92fcbe8c06SSebastian Grimberg } 9394648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 9494648b7dSSebastian Grimberg } 9594648b7dSSebastian Grimberg 961cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 97*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 98*4baa7aecSJeremy L Thompson const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 99*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 10077d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 10194648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 102ad70ee2cSJeremy L Thompson 1031cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 10433e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 105*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 10633e3c889SJeremy L Thompson CeedSize n = 0; 1075c7e0f51SSebastian Grimberg 10833e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 10933e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 110ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 111ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 112ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 113ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1140c73c039SSebastian Grimberg } 115*4baa7aecSJeremy L Thompson for (n = 1; n < elem_size - 1; n++) { 11633e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 11733e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 118ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 119ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 120ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 121ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 122ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 123ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 1240c73c039SSebastian Grimberg } 1250c73c039SSebastian Grimberg } 12633e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 12733e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 128ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 129ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 130ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 131ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 1322b730f8bSJeremy L Thompson } 1332b730f8bSJeremy L Thompson } 1342b730f8bSJeremy L Thompson } 1350c73c039SSebastian Grimberg return CEED_ERROR_SUCCESS; 136fcbe8c06SSebastian Grimberg } 1370c73c039SSebastian Grimberg 1381cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 139*4baa7aecSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, 140*4baa7aecSJeremy L Thompson const CeedInt start, const CeedInt stop, const CeedInt num_elem, 141*4baa7aecSJeremy L Thompson const CeedInt elem_size, const CeedSize v_offset, 142*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 14394648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 1440c73c039SSebastian Grimberg CeedElemRestriction_Ref *impl; 145ad70ee2cSJeremy L Thompson 1461cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 14733e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 148*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 14933e3c889SJeremy L Thompson CeedSize n = 0; 150ad70ee2cSJeremy L Thompson 15133e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 15233e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 153ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 154ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 155ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 156ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1577c1dbaffSSebastian Grimberg } 158*4baa7aecSJeremy L Thompson for (n = 1; n < elem_size - 1; n++) { 15933e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 16033e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 161ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 162ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 163ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 164ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 165ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 166ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 1677c1dbaffSSebastian Grimberg } 1687c1dbaffSSebastian Grimberg } 16933e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 17033e3c889SJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 171ad70ee2cSJeremy L Thompson uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 172ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 173ad70ee2cSJeremy L Thompson uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 174ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 1757c1dbaffSSebastian Grimberg } 1767c1dbaffSSebastian Grimberg } 1777c1dbaffSSebastian Grimberg } 1787c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 1797c1dbaffSSebastian Grimberg } 1807c1dbaffSSebastian Grimberg 1811cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 182*4baa7aecSJeremy L Thompson const CeedInt start, const CeedInt stop, const CeedInt num_elem, 183*4baa7aecSJeremy L Thompson const CeedInt elem_size, const CeedSize v_offset, 184*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 18594648b7dSSebastian Grimberg // No offsets provided, identity restriction 186d1d35e2fSjeremylt bool has_backend_strides; 187ad70ee2cSJeremy L Thompson 1881cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 189d1d35e2fSjeremylt if (has_backend_strides) { 190d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1917f90ec76Sjeremylt // This if brach is left separate to allow better inlining 19233e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 193*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 194*4baa7aecSJeremy L Thompson for (CeedSize n = 0; n < elem_size; n++) { 19533e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 19633e3c889SJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 1972b730f8bSJeremy L Thompson } 1982b730f8bSJeremy L Thompson } 1992b730f8bSJeremy L Thompson } 2002b730f8bSJeremy L Thompson } 2017f90ec76Sjeremylt } else { 2027f90ec76Sjeremylt // User provided strides 2037f90ec76Sjeremylt CeedInt strides[3]; 204ad70ee2cSJeremy L Thompson 20556c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 206ad70ee2cSJeremy L Thompson for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 207*4baa7aecSJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 208*4baa7aecSJeremy L Thompson for (CeedSize n = 0; n < elem_size; n++) { 20933e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 21033e3c889SJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 21133e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 2122b730f8bSJeremy L Thompson } 2132b730f8bSJeremy L Thompson } 2142b730f8bSJeremy L Thompson } 2152b730f8bSJeremy L Thompson } 216523b8ea0Sjeremylt } 21794648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 21894648b7dSSebastian Grimberg } 21994648b7dSSebastian Grimberg 220eda0adbcSSebastian Grimberg static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 221*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 222*4baa7aecSJeremy L Thompson const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 223*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 224fcbe8c06SSebastian Grimberg // Default restriction with offsets 22594648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 226ad70ee2cSJeremy L Thompson 2271cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 22833e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 22933e3c889SJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 23033e3c889SJeremy L Thompson for (CeedSize i = 0; i < elem_size * block_size; i += block_size) { 2318d94b059Sjeremylt // Iteration bound set to discard padding elements 23233e3c889SJeremy L Thompson for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 2335c7e0f51SSebastian Grimberg CeedScalar vv_loc; 23458c07c4fSSebastian Grimberg 23533e3c889SJeremy L Thompson vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 2365c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 237fcbe8c06SSebastian Grimberg } 238fcbe8c06SSebastian Grimberg } 239fcbe8c06SSebastian Grimberg } 240fcbe8c06SSebastian Grimberg } 24194648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 24294648b7dSSebastian Grimberg } 24394648b7dSSebastian Grimberg 2441cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 245*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 246*4baa7aecSJeremy L Thompson const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 247*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 248fcbe8c06SSebastian Grimberg // Restriction with orientations 24994648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 250ad70ee2cSJeremy L Thompson 2511cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 25233e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 25333e3c889SJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 25433e3c889SJeremy L Thompson for (CeedSize i = 0; i < elem_size * block_size; i += block_size) { 255fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 25633e3c889SJeremy L Thompson for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 2575c7e0f51SSebastian Grimberg CeedScalar vv_loc; 25858c07c4fSSebastian Grimberg 25933e3c889SJeremy L Thompson vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 2605c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 261fcbe8c06SSebastian Grimberg } 262fcbe8c06SSebastian Grimberg } 263fcbe8c06SSebastian Grimberg } 264fcbe8c06SSebastian Grimberg } 26594648b7dSSebastian Grimberg return CEED_ERROR_SUCCESS; 26694648b7dSSebastian Grimberg } 26794648b7dSSebastian Grimberg 2681cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 269*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 270*4baa7aecSJeremy L Thompson const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 271*4baa7aecSJeremy L Thompson const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 27277d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 27394648b7dSSebastian Grimberg CeedElemRestriction_Ref *impl; 2745c7e0f51SSebastian Grimberg CeedScalar vv_loc[block_size]; 275ad70ee2cSJeremy L Thompson 2761cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 27733e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 27833e3c889SJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 279fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 28033e3c889SJeremy L Thompson const CeedSize block_end = CeedIntMin(block_size, num_elem - e); 28133e3c889SJeremy L Thompson CeedSize n = 0; 28258c07c4fSSebastian Grimberg 28333e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 28433e3c889SJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 285ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 28633e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 287ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 2885c7e0f51SSebastian Grimberg } 28933e3c889SJeremy L Thompson for (CeedSize j = 0; j < block_end; j++) { 2905c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 2910c73c039SSebastian Grimberg } 2920c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 2935c7e0f51SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 29433e3c889SJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 295ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 29633e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 297ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 29833e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 299ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 3000c73c039SSebastian Grimberg } 30133e3c889SJeremy L Thompson for (CeedSize j = 0; j < block_end; j++) { 3025c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3035c7e0f51SSebastian Grimberg } 3045c7e0f51SSebastian Grimberg } 30533e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 30633e3c889SJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 307ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 30833e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 309ad70ee2cSJeremy L Thompson impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 3105c7e0f51SSebastian Grimberg } 31133e3c889SJeremy L Thompson for (CeedSize j = 0; j < block_end; j++) { 3125c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 31321617c04Sjeremylt } 314b435c5a6Srezgarshakeri } 3152b730f8bSJeremy L Thompson } 316e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31721617c04Sjeremylt } 31821617c04Sjeremylt 3191cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 320*4baa7aecSJeremy L Thompson const CeedInt block_size, const CeedInt comp_stride, 321*4baa7aecSJeremy L Thompson const CeedInt start, const CeedInt stop, const CeedInt num_elem, 322*4baa7aecSJeremy L Thompson const CeedInt elem_size, const CeedSize v_offset, 323eda0adbcSSebastian Grimberg const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 32494648b7dSSebastian Grimberg // Restriction with (unsigned) tridiagonal transformation 3257c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 3265c7e0f51SSebastian Grimberg CeedScalar vv_loc[block_size]; 327ad70ee2cSJeremy L Thompson 3281cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 32933e3c889SJeremy L Thompson for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 33033e3c889SJeremy L Thompson for (CeedSize k = 0; k < num_comp; k++) { 3317c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 33233e3c889SJeremy L Thompson const CeedSize block_end = CeedIntMin(block_size, num_elem - e); 33333e3c889SJeremy L Thompson CeedSize n = 0; 334ad70ee2cSJeremy L Thompson 33533e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 33633e3c889SJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 337ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 33833e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 339ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 3405c7e0f51SSebastian Grimberg } 34133e3c889SJeremy L Thompson for (CeedSize j = 0; j < block_end; j++) { 3425c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3437c1dbaffSSebastian Grimberg } 3447c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 34533e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 34633e3c889SJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 347ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 34833e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 349ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 35033e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 351ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 3527c1dbaffSSebastian Grimberg } 35333e3c889SJeremy L Thompson for (CeedSize j = 0; j < block_end; j++) { 3545c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3555c7e0f51SSebastian Grimberg } 3565c7e0f51SSebastian Grimberg } 35733e3c889SJeremy L Thompson CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 35833e3c889SJeremy L Thompson vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 359ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 36033e3c889SJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 361ad70ee2cSJeremy L Thompson abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 3625c7e0f51SSebastian Grimberg } 36333e3c889SJeremy L Thompson for (CeedSize j = 0; j < block_end; j++) { 3645c7e0f51SSebastian Grimberg CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 3657c1dbaffSSebastian Grimberg } 3667c1dbaffSSebastian Grimberg } 3677c1dbaffSSebastian Grimberg } 3687c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 3697c1dbaffSSebastian Grimberg } 3707c1dbaffSSebastian Grimberg 371*4baa7aecSJeremy L Thompson static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt start, 372*4baa7aecSJeremy L Thompson const CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 373eda0adbcSSebastian Grimberg CeedScalar *__restrict__ vv) { 374171d97d0SJeremy L Thompson CeedInt num_points, l_vec_offset; 375171d97d0SJeremy L Thompson CeedSize e_vec_offset = 0; 37605fa913cSJeremy L Thompson CeedElemRestriction_Ref *impl; 37705fa913cSJeremy L Thompson 37805fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 37905fa913cSJeremy L Thompson for (CeedInt e = start; e < stop; e++) { 3800930e4e7SJeremy L Thompson l_vec_offset = impl->offsets[e]; 38105fa913cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 38205fa913cSJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 38333e3c889SJeremy L Thompson for (CeedSize i = 0; i < num_points; i++) { 38433e3c889SJeremy L Thompson for (CeedSize j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 38505fa913cSJeremy L Thompson } 38605fa913cSJeremy L Thompson } else { 38733e3c889SJeremy L Thompson for (CeedSize i = 0; i < num_points; i++) { 3880b63de31SJeremy L Thompson for (CeedSize j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] += uu[j * num_points + i + e_vec_offset]; 38905fa913cSJeremy L Thompson } 39005fa913cSJeremy L Thompson } 391171d97d0SJeremy L Thompson e_vec_offset += num_points * (CeedSize)num_comp; 39205fa913cSJeremy L Thompson } 39305fa913cSJeremy L Thompson return CEED_ERROR_SUCCESS; 39405fa913cSJeremy L Thompson } 39505fa913cSJeremy L Thompson 3961cc2c60dSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 397*4baa7aecSJeremy L Thompson const CeedInt comp_stride, const CeedInt start, const CeedInt stop, CeedTransposeMode t_mode, 398*4baa7aecSJeremy L Thompson bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 399171d97d0SJeremy L Thompson CeedInt num_elem, elem_size; 400171d97d0SJeremy L Thompson CeedSize v_offset = 0; 401ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 4027c1dbaffSSebastian Grimberg const CeedScalar *uu; 4037c1dbaffSSebastian Grimberg CeedScalar *vv; 404ad70ee2cSJeremy L Thompson 4051cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 4061cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 407171d97d0SJeremy L Thompson v_offset = start * block_size * elem_size * (CeedSize)num_comp; 4081cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 40994648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 410ad70ee2cSJeremy L Thompson 41194648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 41294648b7dSSebastian Grimberg // Sum into for transpose mode, E-vector to L-vector 41394648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 41494648b7dSSebastian Grimberg } else { 41594648b7dSSebastian Grimberg // Overwrite for notranspose mode, L-vector to E-vector 41694648b7dSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 41794648b7dSSebastian Grimberg } 41894648b7dSSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 4197c1dbaffSSebastian Grimberg // Restriction from E-vector to L-vector 4207c1dbaffSSebastian Grimberg // Performing v += r^T * u 4217c1dbaffSSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 4227c1dbaffSSebastian Grimberg // vv has shape [nnodes, num_comp] 4237c1dbaffSSebastian Grimberg // Sum into for transpose mode 4247c1dbaffSSebastian Grimberg switch (rstr_type) { 4257c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4265d10938bSJeremy L Thompson CeedCallBackend( 4271cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 42894648b7dSSebastian Grimberg break; 42961a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 430eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 4315d10938bSJeremy L Thompson v_offset, uu, vv)); 43294648b7dSSebastian Grimberg break; 4337c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 43494648b7dSSebastian Grimberg if (use_signs) { 4351cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4361cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 43794648b7dSSebastian Grimberg } else { 438eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 439eda0adbcSSebastian Grimberg v_offset, uu, vv)); 44094648b7dSSebastian Grimberg } 44194648b7dSSebastian Grimberg break; 44294648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 44394648b7dSSebastian Grimberg if (use_signs && use_orients) { 4441cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4455d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 44694648b7dSSebastian Grimberg } else if (use_orients) { 4471cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4481cc2c60dSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 44994648b7dSSebastian Grimberg } else { 450eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 451eda0adbcSSebastian Grimberg v_offset, uu, vv)); 45294648b7dSSebastian Grimberg } 45394648b7dSSebastian Grimberg break; 4542c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4551249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4562c7e7413SJeremy L Thompson break; 45794648b7dSSebastian Grimberg } 45894648b7dSSebastian Grimberg } else { 45994648b7dSSebastian Grimberg // Restriction from L-vector to E-vector 46094648b7dSSebastian Grimberg // Perform: v = r * u 46194648b7dSSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 46294648b7dSSebastian Grimberg // uu has shape [nnodes, num_comp] 46394648b7dSSebastian Grimberg // Overwrite for notranspose mode 46494648b7dSSebastian Grimberg switch (rstr_type) { 46594648b7dSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 4665d10938bSJeremy L Thompson CeedCallBackend( 4671cc2c60dSJeremy L Thompson CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 46894648b7dSSebastian Grimberg break; 46961a27d74SSebastian Grimberg case CEED_RESTRICTION_STANDARD: 470eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 471eda0adbcSSebastian Grimberg v_offset, uu, vv)); 47294648b7dSSebastian Grimberg break; 47394648b7dSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 47494648b7dSSebastian Grimberg if (use_signs) { 4751cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4761cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 47794648b7dSSebastian Grimberg } else { 478eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4791cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 48094648b7dSSebastian Grimberg } 48194648b7dSSebastian Grimberg break; 48294648b7dSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 48394648b7dSSebastian Grimberg if (use_signs && use_orients) { 4841cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4855d10938bSJeremy L Thompson elem_size, v_offset, uu, vv)); 48694648b7dSSebastian Grimberg } else if (use_orients) { 4871cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 4885d10938bSJeremy L Thompson num_elem, elem_size, v_offset, uu, vv)); 48994648b7dSSebastian Grimberg } else { 490eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 4911cc2c60dSJeremy L Thompson elem_size, v_offset, uu, vv)); 49294648b7dSSebastian Grimberg } 49394648b7dSSebastian Grimberg break; 4942c7e7413SJeremy L Thompson case CEED_RESTRICTION_POINTS: 4951249ccc5SJeremy L Thompson CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 4962c7e7413SJeremy L Thompson break; 49794648b7dSSebastian Grimberg } 4987c1dbaffSSebastian Grimberg } 4997c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 5007c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 5017c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 5027c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 5037c1dbaffSSebastian Grimberg } 5047c1dbaffSSebastian Grimberg 5057c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 506f10650afSjeremylt // ElemRestriction Apply - Common Sizes 507f10650afSjeremylt //------------------------------------------------------------------------------ 5081cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5097c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5107c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5111cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 512d979a051Sjeremylt } 513d979a051Sjeremylt 5141cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5157c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5167c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5171cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5184d2a38eeSjeremylt } 5194d2a38eeSjeremylt 5201cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5217c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5227c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5231cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 5249c36149bSjeremylt } 5259c36149bSjeremylt 5261cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5277c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5287c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5291cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5309c36149bSjeremylt } 5319c36149bSjeremylt 5321cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5337c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5347c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5351cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 536d979a051Sjeremylt } 537d979a051Sjeremylt 5381cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5397c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5407c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5411cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 542d979a051Sjeremylt } 543d979a051Sjeremylt 5441cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5457c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5467c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5471cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 548d979a051Sjeremylt } 549d979a051Sjeremylt 5501cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5517c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5527c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5531cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 554d979a051Sjeremylt } 555d979a051Sjeremylt 556bf4d1581Sjeremylt // LCOV_EXCL_START 557cf415216SJeremy L Thompson static int CeedElemRestrictionApply_Ref_410(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 558cf415216SJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 559cf415216SJeremy L Thompson CeedVector v, CeedRequest *request) { 560cf415216SJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 561cf415216SJeremy L Thompson } 562cf415216SJeremy L Thompson 563cf415216SJeremy L Thompson static int CeedElemRestrictionApply_Ref_411(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 564cf415216SJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 565cf415216SJeremy L Thompson CeedVector v, CeedRequest *request) { 566cf415216SJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 567cf415216SJeremy L Thompson } 568cf415216SJeremy L Thompson 569cf415216SJeremy L Thompson static int CeedElemRestrictionApply_Ref_480(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 570cf415216SJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 571cf415216SJeremy L Thompson CeedVector v, CeedRequest *request) { 572cf415216SJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 573cf415216SJeremy L Thompson } 574cf415216SJeremy L Thompson 575cf415216SJeremy L Thompson static int CeedElemRestrictionApply_Ref_481(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 576cf415216SJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 577cf415216SJeremy L Thompson CeedVector v, CeedRequest *request) { 578cf415216SJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 579cf415216SJeremy L Thompson } 580cf415216SJeremy L Thompson 5811cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5827c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5837c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5841cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 585d979a051Sjeremylt } 586bf4d1581Sjeremylt // LCOV_EXCL_STOP 587d979a051Sjeremylt 5881cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5897c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5907c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5911cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 592d979a051Sjeremylt } 593d979a051Sjeremylt 594bf4d1581Sjeremylt // LCOV_EXCL_START 5951cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 5967c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 5977c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 5981cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 599d979a051Sjeremylt } 600bf4d1581Sjeremylt // LCOV_EXCL_STOP 601d979a051Sjeremylt 6021cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 6037c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 6040c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 6051cc2c60dSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 6064d2a38eeSjeremylt } 6074d2a38eeSjeremylt 608f10650afSjeremylt //------------------------------------------------------------------------------ 609f10650afSjeremylt // ElemRestriction Apply 610f10650afSjeremylt //------------------------------------------------------------------------------ 6111cc2c60dSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 612ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 613ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 614ad70ee2cSJeremy L Thompson 6151cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6161cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6171cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6181cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6191cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6201cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 6215d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 622f30b1135SSebastian Grimberg } 623f30b1135SSebastian Grimberg 624f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 625f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 626f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 6271cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6281cc2c60dSJeremy L Thompson CeedRequest *request) { 629ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 630ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 631ad70ee2cSJeremy L Thompson 6321cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6331cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6341cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6351cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6361cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6371cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 6385d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6397c1dbaffSSebastian Grimberg } 6407c1dbaffSSebastian Grimberg 6417c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6427c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented 6437c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 6441cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6451cc2c60dSJeremy L Thompson CeedRequest *request) { 646ad70ee2cSJeremy L Thompson CeedInt num_block, block_size, num_comp, comp_stride; 647ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 648ad70ee2cSJeremy L Thompson 6491cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 6501cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6511cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6521cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6531cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6541cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 6555d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6569c36149bSjeremylt } 657be9261b7Sjeremylt 658f10650afSjeremylt //------------------------------------------------------------------------------ 6592c7e7413SJeremy L Thompson // ElemRestriction Apply Points 6602c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 661eda0adbcSSebastian Grimberg static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 6622c7e7413SJeremy L Thompson CeedRequest *request) { 66305fa913cSJeremy L Thompson CeedInt num_comp; 6642c7e7413SJeremy L Thompson CeedElemRestriction_Ref *impl; 6652c7e7413SJeremy L Thompson 666eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 667eda0adbcSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 668eda0adbcSSebastian Grimberg return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 6692c7e7413SJeremy L Thompson } 6702c7e7413SJeremy L Thompson 6712c7e7413SJeremy L Thompson //------------------------------------------------------------------------------ 672f10650afSjeremylt // ElemRestriction Apply Block 673f10650afSjeremylt //------------------------------------------------------------------------------ 6741cc2c60dSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 675074cb416Sjeremylt CeedRequest *request) { 676ad70ee2cSJeremy L Thompson CeedInt block_size, num_comp, comp_stride; 677ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 678ad70ee2cSJeremy L Thompson 6791cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 6801cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 6811cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 6821cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 6831cc2c60dSJeremy L Thompson CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 6845d10938bSJeremy L Thompson return CEED_ERROR_SUCCESS; 6859c36149bSjeremylt } 686be9261b7Sjeremylt 687f10650afSjeremylt //------------------------------------------------------------------------------ 688bd33150aSjeremylt // ElemRestriction Get Offsets 689bd33150aSjeremylt //------------------------------------------------------------------------------ 6902b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 691ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 692ad70ee2cSJeremy L Thompson 693ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 694bd33150aSjeremylt 6956e536b99SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 696bd33150aSjeremylt 697bd33150aSjeremylt *offsets = impl->offsets; 698e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 699bd33150aSjeremylt } 700bd33150aSjeremylt 701bd33150aSjeremylt //------------------------------------------------------------------------------ 70277d1c127SSebastian Grimberg // ElemRestriction Get Orientations 70377d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 70477d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 705ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 706ad70ee2cSJeremy L Thompson 707ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 70877d1c127SSebastian Grimberg 7096e536b99SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 71077d1c127SSebastian Grimberg 71177d1c127SSebastian Grimberg *orients = impl->orients; 71277d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 71377d1c127SSebastian Grimberg } 71477d1c127SSebastian Grimberg 71577d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 71677d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations 71777d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 7180c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 719ad70ee2cSJeremy L Thompson CeedElemRestriction_Ref *impl; 720ad70ee2cSJeremy L Thompson 721ad70ee2cSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 72277d1c127SSebastian Grimberg 7236e536b99SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 72477d1c127SSebastian Grimberg 72577d1c127SSebastian Grimberg *curl_orients = impl->curl_orients; 72677d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 72777d1c127SSebastian Grimberg } 72877d1c127SSebastian Grimberg 72977d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 730f10650afSjeremylt // ElemRestriction Destroy 731f10650afSjeremylt //------------------------------------------------------------------------------ 7321cc2c60dSJeremy L Thompson static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 733fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 73421617c04Sjeremylt 7351cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 736a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_owned)); 737a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->orients_owned)); 738a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->curl_orients_owned)); 7392b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 740e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 74121617c04Sjeremylt } 74221617c04Sjeremylt 743f10650afSjeremylt //------------------------------------------------------------------------------ 744f10650afSjeremylt // ElemRestriction Create 745f10650afSjeremylt //------------------------------------------------------------------------------ 746fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 7471cc2c60dSJeremy L Thompson const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 748ad70ee2cSJeremy L Thompson Ceed ceed; 74907d5dec1SJeremy L Thompson CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 750ad70ee2cSJeremy L Thompson CeedRestrictionType rstr_type; 75121617c04Sjeremylt CeedElemRestriction_Ref *impl; 752ad70ee2cSJeremy L Thompson 7531cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 7541cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 7551cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 7561cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 7571cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 7581cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 7591cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 76022eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 76121617c04Sjeremylt 7626574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 763dce49693SSebastian Grimberg 7642b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 765dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 76622eb1385SJeremy L Thompson 76722eb1385SJeremy L Thompson // Set layouts 76822eb1385SJeremy L Thompson { 76922eb1385SJeremy L Thompson bool has_backend_strides; 77022eb1385SJeremy L Thompson CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 77122eb1385SJeremy L Thompson 772dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 77322eb1385SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_STRIDED) { 77422eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 77522eb1385SJeremy L Thompson if (has_backend_strides) { 77622eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 77722eb1385SJeremy L Thompson } 77822eb1385SJeremy L Thompson } 77922eb1385SJeremy L Thompson } 7803661185eSjeremylt 781ff1bc20eSJeremy L Thompson // Expand E-vector size for AtPoints 782ff1bc20eSJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 783ff1bc20eSJeremy L Thompson CeedSize max_points = 0, num_points_total = 0; 784ff1bc20eSJeremy L Thompson 785ff1bc20eSJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 786ff1bc20eSJeremy L Thompson CeedInt num_points = offsets[i + 1] - offsets[i]; 787ff1bc20eSJeremy L Thompson 788ff1bc20eSJeremy L Thompson max_points = CeedIntMax(max_points, num_points); 789ff1bc20eSJeremy L Thompson num_points_total += num_points; 790ff1bc20eSJeremy L Thompson } 791ff1bc20eSJeremy L Thompson // -- Increase size for last element 792ff1bc20eSJeremy L Thompson num_points_total += (max_points - (offsets[num_elem] - offsets[num_elem - 1])); 793ff1bc20eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, num_points_total * num_comp)); 794ff1bc20eSJeremy L Thompson } 795ff1bc20eSJeremy L Thompson 79692fe105eSJeremy L Thompson // Offsets data 797fcbe8c06SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 7983661185eSjeremylt const char *resource; 799ad70ee2cSJeremy L Thompson 800ad70ee2cSJeremy L Thompson // Check indices for ref or memcheck backends 80135aed383SJeremy L Thompson { 8029bc66399SJeremy L Thompson Ceed current = ceed, ceed_parent = NULL; 80335aed383SJeremy L Thompson 8049bc66399SJeremy L Thompson CeedCallBackend(CeedGetParent(current, &ceed_parent)); 8059bc66399SJeremy L Thompson CeedCallBackend(CeedGetResource(ceed_parent, &resource)); 8069bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_parent)); 80735aed383SJeremy L Thompson } 8081ba74105SJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked")) { 809e79b91d9SJeremy L Thompson CeedSize l_size; 8103661185eSjeremylt 8111cc2c60dSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 8122b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 8136574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 8146574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 8152b730f8bSJeremy L Thompson } 8162b730f8bSJeremy L Thompson } 8173661185eSjeremylt 81892fe105eSJeremy L Thompson // Copy data 81907d5dec1SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 82007d5dec1SJeremy L Thompson num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 821f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, num_offsets, &impl->offsets_owned, &impl->offsets_borrowed, &impl->offsets)); 822fcbe8c06SSebastian Grimberg 823fcbe8c06SSebastian Grimberg // Orientation data 824fcbe8c06SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 8250305e208SSebastian Grimberg CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 826f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, num_offsets, &impl->orients_owned, &impl->orients_borrowed, &impl->orients)); 827fcbe8c06SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 8280305e208SSebastian Grimberg CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 829f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * num_offsets, &impl->curl_orients_owned, &impl->curl_orients_borrowed, 830f5d1e504SJeremy L Thompson &impl->curl_orients)); 831fcbe8c06SSebastian Grimberg } 83292fe105eSJeremy L Thompson } 833fe2413ffSjeremylt 834ad70ee2cSJeremy L Thompson // Set apply function based upon num_comp, block_size, and comp_stride 835ad70ee2cSJeremy L Thompson CeedInt index = -1; 836ad70ee2cSJeremy L Thompson 837ad70ee2cSJeremy L Thompson if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 838ad70ee2cSJeremy L Thompson switch (index) { 839d979a051Sjeremylt case 110: 840d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 841d979a051Sjeremylt break; 842d979a051Sjeremylt case 111: 843d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 844d979a051Sjeremylt break; 845d979a051Sjeremylt case 180: 846d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 847d979a051Sjeremylt break; 848d979a051Sjeremylt case 181: 849d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 850d979a051Sjeremylt break; 851d979a051Sjeremylt case 310: 852d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 853d979a051Sjeremylt break; 854d979a051Sjeremylt case 311: 855d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 856d979a051Sjeremylt break; 857d979a051Sjeremylt case 380: 858d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 859d979a051Sjeremylt break; 860d979a051Sjeremylt case 381: 861d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 862d979a051Sjeremylt break; 863bf4d1581Sjeremylt // LCOV_EXCL_START 864cf415216SJeremy L Thompson case 410: 865cf415216SJeremy L Thompson impl->Apply = CeedElemRestrictionApply_Ref_410; 866cf415216SJeremy L Thompson break; 867cf415216SJeremy L Thompson case 411: 868cf415216SJeremy L Thompson impl->Apply = CeedElemRestrictionApply_Ref_411; 869cf415216SJeremy L Thompson break; 870cf415216SJeremy L Thompson case 480: 871cf415216SJeremy L Thompson impl->Apply = CeedElemRestrictionApply_Ref_480; 872cf415216SJeremy L Thompson break; 873cf415216SJeremy L Thompson case 481: 874cf415216SJeremy L Thompson impl->Apply = CeedElemRestrictionApply_Ref_481; 875cf415216SJeremy L Thompson break; 876d979a051Sjeremylt case 510: 877d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 878d979a051Sjeremylt break; 879bf4d1581Sjeremylt // LCOV_EXCL_STOP 880d979a051Sjeremylt case 511: 881d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 882d979a051Sjeremylt break; 883bf4d1581Sjeremylt // LCOV_EXCL_START 884d979a051Sjeremylt case 580: 885d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 886d979a051Sjeremylt break; 887bf4d1581Sjeremylt // LCOV_EXCL_STOP 888d979a051Sjeremylt case 581: 889d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 890d979a051Sjeremylt break; 891d979a051Sjeremylt default: 892d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 893d979a051Sjeremylt break; 894d979a051Sjeremylt } 895dce49693SSebastian Grimberg 896dce49693SSebastian Grimberg // Register backend functions 897dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 898dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 899dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 900dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_POINTS) { 901dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 902dce49693SSebastian Grimberg } 903dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 904dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 905dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 906dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 907dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 9089bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 909e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 91021617c04Sjeremylt } 911fc0567d9Srezgarshakeri 912fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 913