13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 113d576824SJeremy L Thompson #include <string.h> 122b730f8bSJeremy L Thompson 1321617c04Sjeremylt #include "ceed-ref.h" 1421617c04Sjeremylt 15f10650afSjeremylt //------------------------------------------------------------------------------ 16f10650afSjeremylt // Core ElemRestriction Apply Code 17f10650afSjeremylt //------------------------------------------------------------------------------ 182b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 19*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, 20*3bdd4e5aSSebastian Grimberg CeedVector v, CeedRequest *request) { 214ce2993fSjeremylt CeedElemRestriction_Ref *impl; 222b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2321617c04Sjeremylt const CeedScalar *uu; 2421617c04Sjeremylt CeedScalar *vv; 25d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 262b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 272b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 28d1d35e2fSjeremylt v_offset = start * blk_size * elem_size * num_comp; 2921617c04Sjeremylt 30b435c5a6Srezgarshakeri bool is_oriented; 312b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented)); 322b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 339c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 349c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 369c774eddSJeremy L Thompson } else { 379c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 382b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 399c774eddSJeremy L Thompson } 407f90ec76Sjeremylt // Restriction from L-vector to E-vector 4121617c04Sjeremylt // Perform: v = r * u 42d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 43d979a051Sjeremylt // No offsets provided, Identity Restriction 44d979a051Sjeremylt if (!impl->offsets) { 45d1d35e2fSjeremylt bool has_backend_strides; 462b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47d1d35e2fSjeremylt if (has_backend_strides) { 48d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 497f90ec76Sjeremylt // This if branch is left separate to allow better inlining 502b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 512b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 522b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 532b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 542b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 552b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 562b730f8bSJeremy L Thompson } 572b730f8bSJeremy L Thompson } 582b730f8bSJeremy L Thompson } 592b730f8bSJeremy L Thompson } 607f90ec76Sjeremylt } else { 617f90ec76Sjeremylt // User provided strides 627f90ec76Sjeremylt CeedInt strides[3]; 632b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 642b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 652b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 662b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 672b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 682b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 692b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 702b730f8bSJeremy L Thompson } 712b730f8bSJeremy L Thompson } 722b730f8bSJeremy L Thompson } 732b730f8bSJeremy L Thompson } 747509a596Sjeremylt } 7521617c04Sjeremylt } else { 76d979a051Sjeremylt // Offsets provided, standard or blocked restriction 77d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 78d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 792b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 802b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 812b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 82f30b1135SSebastian Grimberg if (!use_orient || !impl->orient) { 83f30b1135SSebastian Grimberg // Unsigned restriction 84*3bdd4e5aSSebastian Grimberg vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = uu[impl->offsets[i + elem_size * e] + k * comp_stride]; 85f30b1135SSebastian Grimberg } else { 86f30b1135SSebastian Grimberg // Signed restriction 87f30b1135SSebastian Grimberg vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 88f30b1135SSebastian Grimberg uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (impl->orient[i + elem_size * e] ? -1.0 : 1.0); 89f30b1135SSebastian Grimberg } 902b730f8bSJeremy L Thompson } 912b730f8bSJeremy L Thompson } 922b730f8bSJeremy L Thompson } 93b435c5a6Srezgarshakeri } 9421617c04Sjeremylt } else { 957f90ec76Sjeremylt // Restriction from E-vector to L-vector 968d94b059Sjeremylt // Performing v += r^T * u 97d979a051Sjeremylt // No offsets provided, Identity Restriction 98d979a051Sjeremylt if (!impl->offsets) { 99d1d35e2fSjeremylt bool has_backend_strides; 1002b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 101d1d35e2fSjeremylt if (has_backend_strides) { 102d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1037f90ec76Sjeremylt // This if brach is left separate to allow better inlining 1042b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1052b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1062b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1072b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1082b730f8bSJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 1092b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1102b730f8bSJeremy L Thompson } 1112b730f8bSJeremy L Thompson } 1122b730f8bSJeremy L Thompson } 1132b730f8bSJeremy L Thompson } 1147f90ec76Sjeremylt } else { 1157f90ec76Sjeremylt // User provided strides 1167f90ec76Sjeremylt CeedInt strides[3]; 1172b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 1182b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1192b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1202b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1212b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1222b730f8bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 1232b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1242b730f8bSJeremy L Thompson } 1252b730f8bSJeremy L Thompson } 1262b730f8bSJeremy L Thompson } 1272b730f8bSJeremy L Thompson } 128523b8ea0Sjeremylt } 12921617c04Sjeremylt } else { 130d979a051Sjeremylt // Offsets provided, standard or blocked restriction 131d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 132d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 1332b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1342b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 1352b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 1368d94b059Sjeremylt // Iteration bound set to discard padding elements 1372b730f8bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 138f30b1135SSebastian Grimberg if (!use_orient || !impl->orient) { 139f30b1135SSebastian Grimberg // Unsigned restriction 140*3bdd4e5aSSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset]; 141f30b1135SSebastian Grimberg } else { 142f30b1135SSebastian Grimberg // Signed restriction 143f30b1135SSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 144f30b1135SSebastian Grimberg uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (impl->orient[j + e * elem_size] ? -1.0 : 1.0); 145f30b1135SSebastian Grimberg } 14621617c04Sjeremylt } 147b435c5a6Srezgarshakeri } 1482b730f8bSJeremy L Thompson } 1492b730f8bSJeremy L Thompson } 1502b730f8bSJeremy L Thompson } 1512b730f8bSJeremy L Thompson } 1522b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 1532b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 1542b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 155e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 15621617c04Sjeremylt } 15721617c04Sjeremylt 158f10650afSjeremylt //------------------------------------------------------------------------------ 159f10650afSjeremylt // ElemRestriction Apply - Common Sizes 160f10650afSjeremylt //------------------------------------------------------------------------------ 1612b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 162*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 163*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 164f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request); 165d979a051Sjeremylt } 166d979a051Sjeremylt 1672b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 168*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 169*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 170f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orient, t_mode, u, v, request); 1714d2a38eeSjeremylt } 1724d2a38eeSjeremylt 1732b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 174*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 175*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 176f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request); 1779c36149bSjeremylt } 1789c36149bSjeremylt 1792b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 180*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 181*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 182f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orient, t_mode, u, v, request); 1839c36149bSjeremylt } 1849c36149bSjeremylt 1852b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 186*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 187*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 188f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request); 189d979a051Sjeremylt } 190d979a051Sjeremylt 1912b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 192*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 193*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 194f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orient, t_mode, u, v, request); 195d979a051Sjeremylt } 196d979a051Sjeremylt 1972b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 198*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 199*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 200f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request); 201d979a051Sjeremylt } 202d979a051Sjeremylt 2032b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 204*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 205*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 206f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orient, t_mode, u, v, request); 207d979a051Sjeremylt } 208d979a051Sjeremylt 209bf4d1581Sjeremylt // LCOV_EXCL_START 2102b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 211*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 212*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 213f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request); 214d979a051Sjeremylt } 215bf4d1581Sjeremylt // LCOV_EXCL_STOP 216d979a051Sjeremylt 2172b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 218*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 219*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 220f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orient, t_mode, u, v, request); 221d979a051Sjeremylt } 222d979a051Sjeremylt 223bf4d1581Sjeremylt // LCOV_EXCL_START 2242b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 225*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 226*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 227f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request); 228d979a051Sjeremylt } 229bf4d1581Sjeremylt // LCOV_EXCL_STOP 230d979a051Sjeremylt 2312b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 232*3bdd4e5aSSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 233*3bdd4e5aSSebastian Grimberg CeedRequest *request) { 234f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orient, t_mode, u, v, request); 2354d2a38eeSjeremylt } 2364d2a38eeSjeremylt 237f10650afSjeremylt //------------------------------------------------------------------------------ 238f10650afSjeremylt // ElemRestriction Apply 239f10650afSjeremylt //------------------------------------------------------------------------------ 2402b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 241d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 2422b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 2432b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2442b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2452b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2467509a596Sjeremylt CeedElemRestriction_Ref *impl; 2472b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2484d2a38eeSjeremylt 249f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request); 250f30b1135SSebastian Grimberg } 251f30b1135SSebastian Grimberg 252f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 253f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 254f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 255f30b1135SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 256f30b1135SSebastian Grimberg CeedInt num_blk, blk_size, num_comp, comp_stride; 257f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 258f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 259f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 260f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 261f30b1135SSebastian Grimberg CeedElemRestriction_Ref *impl; 262f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 263f30b1135SSebastian Grimberg 264f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request); 2659c36149bSjeremylt } 266be9261b7Sjeremylt 267f10650afSjeremylt //------------------------------------------------------------------------------ 268f10650afSjeremylt // ElemRestriction Apply Block 269f10650afSjeremylt //------------------------------------------------------------------------------ 2702b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 271074cb416Sjeremylt CeedRequest *request) { 272d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 2732b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2742b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2752b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2767509a596Sjeremylt CeedElemRestriction_Ref *impl; 2772b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2784d2a38eeSjeremylt 279f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request); 2809c36149bSjeremylt } 281be9261b7Sjeremylt 282f10650afSjeremylt //------------------------------------------------------------------------------ 283bd33150aSjeremylt // ElemRestriction Get Offsets 284bd33150aSjeremylt //------------------------------------------------------------------------------ 2852b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 286bd33150aSjeremylt CeedElemRestriction_Ref *impl; 2872b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 288bd33150aSjeremylt Ceed ceed; 2892b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 290bd33150aSjeremylt 2916574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 292bd33150aSjeremylt 293bd33150aSjeremylt *offsets = impl->offsets; 294e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 295bd33150aSjeremylt } 296bd33150aSjeremylt 297bd33150aSjeremylt //------------------------------------------------------------------------------ 298f10650afSjeremylt // ElemRestriction Destroy 299f10650afSjeremylt //------------------------------------------------------------------------------ 30021617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 301fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 3022b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 30321617c04Sjeremylt 3042b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 3052b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->orient_allocated)); 3062b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 307e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 30821617c04Sjeremylt } 30921617c04Sjeremylt 310f10650afSjeremylt //------------------------------------------------------------------------------ 311f10650afSjeremylt // ElemRestriction Create 312f10650afSjeremylt //------------------------------------------------------------------------------ 3132b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) { 31421617c04Sjeremylt CeedElemRestriction_Ref *impl; 315d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 3162b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 3172b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 3182b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 3192b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 3202b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 3212b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 3224ce2993fSjeremylt Ceed ceed; 3232b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 32421617c04Sjeremylt 3256574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 3262b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 3273661185eSjeremylt 32892fe105eSJeremy L Thompson // Offsets data 329d1d35e2fSjeremylt bool is_strided; 3302b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided)); 331d1d35e2fSjeremylt if (!is_strided) { 33292fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 333d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 334d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 335d1d35e2fSjeremylt curr_ceed = parent_ceed; 3362b730f8bSJeremy L Thompson CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 3373661185eSjeremylt } 3383661185eSjeremylt const char *resource; 3392b730f8bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 3402b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 341d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 342e79b91d9SJeremy L Thompson CeedSize l_size; 3432b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 3443661185eSjeremylt 3452b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 3466574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 3476574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 3482b730f8bSJeremy L Thompson } 3492b730f8bSJeremy L Thompson } 3503661185eSjeremylt 35192fe105eSJeremy L Thompson // Copy data 352d1d35e2fSjeremylt switch (copy_mode) { 35321617c04Sjeremylt case CEED_COPY_VALUES: 3542b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 3552b730f8bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 356d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 35721617c04Sjeremylt break; 35821617c04Sjeremylt case CEED_OWN_POINTER: 359d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 360d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 36121617c04Sjeremylt break; 36221617c04Sjeremylt case CEED_USE_POINTER: 363d979a051Sjeremylt impl->offsets = offsets; 36421617c04Sjeremylt } 36592fe105eSJeremy L Thompson } 366fe2413ffSjeremylt 3672b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 368d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 3692b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 3702b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 371f30b1135SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 3722b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 3732b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 3742b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 375d979a051Sjeremylt 376d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 377d979a051Sjeremylt CeedInt idx = -1; 3782b730f8bSJeremy L Thompson if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 379d979a051Sjeremylt switch (idx) { 380d979a051Sjeremylt case 110: 381d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 382d979a051Sjeremylt break; 383d979a051Sjeremylt case 111: 384d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 385d979a051Sjeremylt break; 386d979a051Sjeremylt case 180: 387d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 388d979a051Sjeremylt break; 389d979a051Sjeremylt case 181: 390d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 391d979a051Sjeremylt break; 392d979a051Sjeremylt case 310: 393d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 394d979a051Sjeremylt break; 395d979a051Sjeremylt case 311: 396d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 397d979a051Sjeremylt break; 398d979a051Sjeremylt case 380: 399d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 400d979a051Sjeremylt break; 401d979a051Sjeremylt case 381: 402d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 403d979a051Sjeremylt break; 404bf4d1581Sjeremylt // LCOV_EXCL_START 405d979a051Sjeremylt case 510: 406d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 407d979a051Sjeremylt break; 408bf4d1581Sjeremylt // LCOV_EXCL_STOP 409d979a051Sjeremylt case 511: 410d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 411d979a051Sjeremylt break; 412bf4d1581Sjeremylt // LCOV_EXCL_START 413d979a051Sjeremylt case 580: 414d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 415d979a051Sjeremylt break; 416bf4d1581Sjeremylt // LCOV_EXCL_STOP 417d979a051Sjeremylt case 581: 418d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 419d979a051Sjeremylt break; 420d979a051Sjeremylt default: 421d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 422d979a051Sjeremylt break; 423d979a051Sjeremylt } 424d979a051Sjeremylt 425e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 42621617c04Sjeremylt } 427fc0567d9Srezgarshakeri 428fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 429fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 430fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 4312b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient, 432fc0567d9Srezgarshakeri CeedElemRestriction r) { 433fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 434fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 435f30b1135SSebastian Grimberg CeedElemRestriction_Ref *impl; 436f30b1135SSebastian Grimberg CeedInt num_elem, elem_size; 4372b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r)); 4382b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 4392b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 4402b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 441f30b1135SSebastian Grimberg 442f30b1135SSebastian Grimberg // Copy data 443fc0567d9Srezgarshakeri switch (copy_mode) { 444fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 4452b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated)); 4462b730f8bSJeremy L Thompson memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0])); 447fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 448fc0567d9Srezgarshakeri break; 449fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 450fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 451fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 452fc0567d9Srezgarshakeri break; 453fc0567d9Srezgarshakeri case CEED_USE_POINTER: 454fc0567d9Srezgarshakeri impl->orient = orient; 455fc0567d9Srezgarshakeri } 456f30b1135SSebastian Grimberg 457fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 458fc0567d9Srezgarshakeri } 4592a86cc9dSSebastian Grimberg 460f10650afSjeremylt //------------------------------------------------------------------------------ 461