13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 11fcbe8c06SSebastian Grimberg #include <stdlib.h> 123d576824SJeremy L Thompson #include <string.h> 132b730f8bSJeremy L Thompson 1421617c04Sjeremylt #include "ceed-ref.h" 1521617c04Sjeremylt 16f10650afSjeremylt //------------------------------------------------------------------------------ 17f10650afSjeremylt // Core ElemRestriction Apply Code 18f10650afSjeremylt //------------------------------------------------------------------------------ 190c73c039SSebastian Grimberg static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 20*7c1dbaffSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v, 21*7c1dbaffSSebastian Grimberg CeedRequest *request) { 224ce2993fSjeremylt CeedElemRestriction_Ref *impl; 232b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2421617c04Sjeremylt const CeedScalar *uu; 2521617c04Sjeremylt CeedScalar *vv; 26d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 272b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 282b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 29d1d35e2fSjeremylt v_offset = start * blk_size * elem_size * num_comp; 30fcbe8c06SSebastian Grimberg CeedRestrictionType rstr_type; 31fcbe8c06SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 3221617c04Sjeremylt 337f90ec76Sjeremylt // Restriction from L-vector to E-vector 3421617c04Sjeremylt // Perform: v = r * u 35fcbe8c06SSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 36fcbe8c06SSebastian Grimberg // uu has shape [nnodes, num_comp] 370c73c039SSebastian Grimberg // Overwrite for notranspose mode 380c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 390c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 40fcbe8c06SSebastian Grimberg switch (rstr_type) { 41fcbe8c06SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 42d979a051Sjeremylt // No offsets provided, Identity Restriction 43d1d35e2fSjeremylt bool has_backend_strides; 442b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 45d1d35e2fSjeremylt if (has_backend_strides) { 46d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 477f90ec76Sjeremylt // This if branch is left separate to allow better inlining 482b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 492b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 502b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 512b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 522b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 532b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 542b730f8bSJeremy L Thompson } 552b730f8bSJeremy L Thompson } 562b730f8bSJeremy L Thompson } 572b730f8bSJeremy L Thompson } 587f90ec76Sjeremylt } else { 597f90ec76Sjeremylt // User provided strides 607f90ec76Sjeremylt CeedInt strides[3]; 612b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 622b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 632b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 642b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 652b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 662b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 672b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 682b730f8bSJeremy L Thompson } 692b730f8bSJeremy L Thompson } 702b730f8bSJeremy L Thompson } 712b730f8bSJeremy L Thompson } 727509a596Sjeremylt } 73fcbe8c06SSebastian Grimberg } break; 740c73c039SSebastian Grimberg case CEED_RESTRICTION_DEFAULT: { 75fcbe8c06SSebastian Grimberg // Default restriction with offsets 762b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 772b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 782b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 790c73c039SSebastian Grimberg vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 80fcbe8c06SSebastian Grimberg } 81fcbe8c06SSebastian Grimberg } 82fcbe8c06SSebastian Grimberg } 830c73c039SSebastian Grimberg } break; 840c73c039SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 85fcbe8c06SSebastian Grimberg // Restriction with orientations 86fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 87fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 88fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 890c73c039SSebastian Grimberg vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = 90*7c1dbaffSSebastian Grimberg uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 91fcbe8c06SSebastian Grimberg } 92fcbe8c06SSebastian Grimberg } 93fcbe8c06SSebastian Grimberg } 940c73c039SSebastian Grimberg } break; 950c73c039SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 9677d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 97fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 98fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 990c73c039SSebastian Grimberg CeedInt n = 0; 1000c73c039SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 1010c73c039SSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 1020c73c039SSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 103*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 1040c73c039SSebastian Grimberg uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 105*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 1060c73c039SSebastian Grimberg } 1070c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 1080c73c039SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 1090c73c039SSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 1100c73c039SSebastian Grimberg uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 111*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 1120c73c039SSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 113*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 1140c73c039SSebastian Grimberg uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 115*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 1160c73c039SSebastian Grimberg } 1170c73c039SSebastian Grimberg } 1180c73c039SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 1190c73c039SSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 1200c73c039SSebastian Grimberg uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 121*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 1220c73c039SSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 123*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 1242b730f8bSJeremy L Thompson } 1252b730f8bSJeremy L Thompson } 1262b730f8bSJeremy L Thompson } 1270c73c039SSebastian Grimberg } break; 128b435c5a6Srezgarshakeri } 1290c73c039SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 1300c73c039SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 1310c73c039SSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 1320c73c039SSebastian Grimberg return CEED_ERROR_SUCCESS; 133fcbe8c06SSebastian Grimberg } 1340c73c039SSebastian Grimberg 1350c73c039SSebastian Grimberg //------------------------------------------------------------------------------ 136*7c1dbaffSSebastian Grimberg // Core Unsigned ElemRestriction Apply Code 1370c73c039SSebastian Grimberg //------------------------------------------------------------------------------ 138*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 139*7c1dbaffSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 1400c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 1410c73c039SSebastian Grimberg Ceed ceed; 1420c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 1430c73c039SSebastian Grimberg CeedElemRestriction_Ref *impl; 1440c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 1450c73c039SSebastian Grimberg const CeedScalar *uu; 1460c73c039SSebastian Grimberg CeedScalar *vv; 1470c73c039SSebastian Grimberg CeedInt num_elem, elem_size, v_offset; 1480c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 1490c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 1500c73c039SSebastian Grimberg v_offset = start * blk_size * elem_size * num_comp; 1510c73c039SSebastian Grimberg CeedRestrictionType rstr_type; 1520c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 1530c73c039SSebastian Grimberg 154*7c1dbaffSSebastian Grimberg // Restriction from L-vector to E-vector 155*7c1dbaffSSebastian Grimberg // Perform: v = r * u 156*7c1dbaffSSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 157*7c1dbaffSSebastian Grimberg // uu has shape [nnodes, num_comp] 158*7c1dbaffSSebastian Grimberg // Overwrite for notranspose mode 159*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 160*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 161*7c1dbaffSSebastian Grimberg switch (rstr_type) { 162*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 163*7c1dbaffSSebastian Grimberg // Restriction with orientations 164*7c1dbaffSSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 165*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 166*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 167*7c1dbaffSSebastian Grimberg vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 168*7c1dbaffSSebastian Grimberg } 169*7c1dbaffSSebastian Grimberg } 170*7c1dbaffSSebastian Grimberg } 171*7c1dbaffSSebastian Grimberg } break; 172*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 173*7c1dbaffSSebastian Grimberg // Restriction with tridiagonal transformation 174*7c1dbaffSSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 175*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 176*7c1dbaffSSebastian Grimberg CeedInt n = 0; 177*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 178*7c1dbaffSSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 179*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 180*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 181*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 182*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 183*7c1dbaffSSebastian Grimberg } 184*7c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 185*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 186*7c1dbaffSSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 187*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 188*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 189*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 190*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 191*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 192*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 193*7c1dbaffSSebastian Grimberg } 194*7c1dbaffSSebastian Grimberg } 195*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 196*7c1dbaffSSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 197*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 198*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 199*7c1dbaffSSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 200*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 201*7c1dbaffSSebastian Grimberg } 202*7c1dbaffSSebastian Grimberg } 203*7c1dbaffSSebastian Grimberg } 204*7c1dbaffSSebastian Grimberg } break; 205*7c1dbaffSSebastian Grimberg // LCOV_EXCL_START 206*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 207*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 208*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_DEFAULT: 209*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 210*7c1dbaffSSebastian Grimberg // LCOV_EXCL_STOP 211*7c1dbaffSSebastian Grimberg } 212*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 213*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 214*7c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 215*7c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 216*7c1dbaffSSebastian Grimberg } 217*7c1dbaffSSebastian Grimberg 218*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 219*7c1dbaffSSebastian Grimberg // Core Unoriented ElemRestriction Apply Code 220*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 221*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 222*7c1dbaffSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 223*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 224*7c1dbaffSSebastian Grimberg Ceed ceed; 225*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 226*7c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 227*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 228*7c1dbaffSSebastian Grimberg const CeedScalar *uu; 229*7c1dbaffSSebastian Grimberg CeedScalar *vv; 230*7c1dbaffSSebastian Grimberg CeedInt num_elem, elem_size, v_offset; 231*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 232*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 233*7c1dbaffSSebastian Grimberg v_offset = start * blk_size * elem_size * num_comp; 234*7c1dbaffSSebastian Grimberg CeedRestrictionType rstr_type; 235*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 236*7c1dbaffSSebastian Grimberg 237*7c1dbaffSSebastian Grimberg // Restriction from L-vector to E-vector 238*7c1dbaffSSebastian Grimberg // Perform: v = r * u 239*7c1dbaffSSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 240*7c1dbaffSSebastian Grimberg // uu has shape [nnodes, num_comp] 241*7c1dbaffSSebastian Grimberg // Overwrite for notranspose mode 242*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 243*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 244*7c1dbaffSSebastian Grimberg switch (rstr_type) { 245*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 246*7c1dbaffSSebastian Grimberg // Restriction with tridiagonal transformation 247*7c1dbaffSSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 248*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 249*7c1dbaffSSebastian Grimberg CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 250*7c1dbaffSSebastian Grimberg vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 251*7c1dbaffSSebastian Grimberg } 252*7c1dbaffSSebastian Grimberg } 253*7c1dbaffSSebastian Grimberg } 254*7c1dbaffSSebastian Grimberg } break; 255*7c1dbaffSSebastian Grimberg // LCOV_EXCL_START 256*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 257*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 258*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_DEFAULT: 259*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 260*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 261*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported"); 262*7c1dbaffSSebastian Grimberg // LCOV_EXCL_STOP 263*7c1dbaffSSebastian Grimberg } 264*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 265*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 266*7c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 267*7c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 268*7c1dbaffSSebastian Grimberg } 269*7c1dbaffSSebastian Grimberg 270*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 271*7c1dbaffSSebastian Grimberg // Core ElemRestriction Apply Transpose Code 272*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 273*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 274*7c1dbaffSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v, 275*7c1dbaffSSebastian Grimberg CeedRequest *request) { 276*7c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 277*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 278*7c1dbaffSSebastian Grimberg const CeedScalar *uu; 279*7c1dbaffSSebastian Grimberg CeedScalar *vv; 280*7c1dbaffSSebastian Grimberg CeedInt num_elem, elem_size, v_offset; 281*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 282*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 283*7c1dbaffSSebastian Grimberg v_offset = start * blk_size * elem_size * num_comp; 284*7c1dbaffSSebastian Grimberg CeedRestrictionType rstr_type; 285*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 286*7c1dbaffSSebastian Grimberg 2877f90ec76Sjeremylt // Restriction from E-vector to L-vector 2888d94b059Sjeremylt // Performing v += r^T * u 2899475e044SSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 290fcbe8c06SSebastian Grimberg // vv has shape [nnodes, num_comp] 2910c73c039SSebastian Grimberg // Sum into for transpose mode 2920c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 2930c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 294fcbe8c06SSebastian Grimberg switch (rstr_type) { 295fcbe8c06SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 296d979a051Sjeremylt // No offsets provided, Identity Restriction 297d1d35e2fSjeremylt bool has_backend_strides; 2982b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 299d1d35e2fSjeremylt if (has_backend_strides) { 300d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 3017f90ec76Sjeremylt // This if brach is left separate to allow better inlining 3022b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 3032b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 3042b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 3052b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 3062b730f8bSJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 3072b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 3082b730f8bSJeremy L Thompson } 3092b730f8bSJeremy L Thompson } 3102b730f8bSJeremy L Thompson } 3112b730f8bSJeremy L Thompson } 3127f90ec76Sjeremylt } else { 3137f90ec76Sjeremylt // User provided strides 3147f90ec76Sjeremylt CeedInt strides[3]; 3152b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 3162b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 3172b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 3182b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 3192b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 3202b730f8bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 3212b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 3222b730f8bSJeremy L Thompson } 3232b730f8bSJeremy L Thompson } 3242b730f8bSJeremy L Thompson } 3252b730f8bSJeremy L Thompson } 326523b8ea0Sjeremylt } 327fcbe8c06SSebastian Grimberg } break; 3280c73c039SSebastian Grimberg case CEED_RESTRICTION_DEFAULT: { 329fcbe8c06SSebastian Grimberg // Default restriction with offsets 3302b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 3312b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 3322b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 3338d94b059Sjeremylt // Iteration bound set to discard padding elements 3342b730f8bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 3350c73c039SSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 336fcbe8c06SSebastian Grimberg } 337fcbe8c06SSebastian Grimberg } 338fcbe8c06SSebastian Grimberg } 339fcbe8c06SSebastian Grimberg } 3400c73c039SSebastian Grimberg } break; 3410c73c039SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 342fcbe8c06SSebastian Grimberg // Restriction with orientations 343fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 344fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 345fcbe8c06SSebastian Grimberg for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 346fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 347fcbe8c06SSebastian Grimberg for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 348f30b1135SSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 349*7c1dbaffSSebastian Grimberg uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 350fcbe8c06SSebastian Grimberg } 351fcbe8c06SSebastian Grimberg } 352fcbe8c06SSebastian Grimberg } 353fcbe8c06SSebastian Grimberg } 3540c73c039SSebastian Grimberg } break; 3550c73c039SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 35677d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 357fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 358fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 359fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 3600c73c039SSebastian Grimberg CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 3610c73c039SSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 3620c73c039SSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 3630c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 364*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 3650c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 366*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 3670c73c039SSebastian Grimberg } 3680c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 3690c73c039SSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 3700c73c039SSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 3710c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 372*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 3730c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 374*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 3750c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 376*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 3770c73c039SSebastian Grimberg } 3780c73c039SSebastian Grimberg } 3790c73c039SSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 3800c73c039SSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 3810c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 382*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 3830c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 384*7c1dbaffSSebastian Grimberg impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 38521617c04Sjeremylt } 386b435c5a6Srezgarshakeri } 3872b730f8bSJeremy L Thompson } 3880c73c039SSebastian Grimberg } break; 3892b730f8bSJeremy L Thompson } 3902b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 3912b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 3922b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 393e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 39421617c04Sjeremylt } 39521617c04Sjeremylt 396f10650afSjeremylt //------------------------------------------------------------------------------ 397*7c1dbaffSSebastian Grimberg // Core Unsigned ElemRestriction Apply Transpose Code 398*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 399*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 400*7c1dbaffSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 401*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 402*7c1dbaffSSebastian Grimberg Ceed ceed; 403*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 404*7c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 405*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 406*7c1dbaffSSebastian Grimberg const CeedScalar *uu; 407*7c1dbaffSSebastian Grimberg CeedScalar *vv; 408*7c1dbaffSSebastian Grimberg CeedInt num_elem, elem_size, v_offset; 409*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 410*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 411*7c1dbaffSSebastian Grimberg v_offset = start * blk_size * elem_size * num_comp; 412*7c1dbaffSSebastian Grimberg CeedRestrictionType rstr_type; 413*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 414*7c1dbaffSSebastian Grimberg 415*7c1dbaffSSebastian Grimberg // Restriction from E-vector to L-vector 416*7c1dbaffSSebastian Grimberg // Performing v += r^T * u 417*7c1dbaffSSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 418*7c1dbaffSSebastian Grimberg // vv has shape [nnodes, num_comp] 419*7c1dbaffSSebastian Grimberg // Sum into for transpose mode 420*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 421*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 422*7c1dbaffSSebastian Grimberg switch (rstr_type) { 423*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 424*7c1dbaffSSebastian Grimberg // Restriction with orientations 425*7c1dbaffSSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 426*7c1dbaffSSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 427*7c1dbaffSSebastian Grimberg for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 428*7c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 429*7c1dbaffSSebastian Grimberg for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 430*7c1dbaffSSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 431*7c1dbaffSSebastian Grimberg } 432*7c1dbaffSSebastian Grimberg } 433*7c1dbaffSSebastian Grimberg } 434*7c1dbaffSSebastian Grimberg } 435*7c1dbaffSSebastian Grimberg } break; 436*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 437*7c1dbaffSSebastian Grimberg // Restriction with tridiagonal transformation 438*7c1dbaffSSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 439*7c1dbaffSSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 440*7c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 441*7c1dbaffSSebastian Grimberg CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 442*7c1dbaffSSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 443*7c1dbaffSSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 444*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 445*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 446*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 447*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 448*7c1dbaffSSebastian Grimberg } 449*7c1dbaffSSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 450*7c1dbaffSSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 451*7c1dbaffSSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 452*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 453*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 454*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 455*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 456*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 457*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 458*7c1dbaffSSebastian Grimberg } 459*7c1dbaffSSebastian Grimberg } 460*7c1dbaffSSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 461*7c1dbaffSSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 462*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 463*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 464*7c1dbaffSSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 465*7c1dbaffSSebastian Grimberg abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 466*7c1dbaffSSebastian Grimberg } 467*7c1dbaffSSebastian Grimberg } 468*7c1dbaffSSebastian Grimberg } 469*7c1dbaffSSebastian Grimberg } break; 470*7c1dbaffSSebastian Grimberg // LCOV_EXCL_START 471*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 472*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 473*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_DEFAULT: 474*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 475*7c1dbaffSSebastian Grimberg // LCOV_EXCL_STOP 476*7c1dbaffSSebastian Grimberg } 477*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 478*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 479*7c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 480*7c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 481*7c1dbaffSSebastian Grimberg } 482*7c1dbaffSSebastian Grimberg 483*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 484*7c1dbaffSSebastian Grimberg // Core Unoriented ElemRestriction Apply Transpose Code 485*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 486*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 487*7c1dbaffSSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 488*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 489*7c1dbaffSSebastian Grimberg Ceed ceed; 490*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 491*7c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 492*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 493*7c1dbaffSSebastian Grimberg const CeedScalar *uu; 494*7c1dbaffSSebastian Grimberg CeedScalar *vv; 495*7c1dbaffSSebastian Grimberg CeedInt num_elem, elem_size, v_offset; 496*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 497*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 498*7c1dbaffSSebastian Grimberg v_offset = start * blk_size * elem_size * num_comp; 499*7c1dbaffSSebastian Grimberg CeedRestrictionType rstr_type; 500*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 501*7c1dbaffSSebastian Grimberg 502*7c1dbaffSSebastian Grimberg // Restriction from E-vector to L-vector 503*7c1dbaffSSebastian Grimberg // Performing v += r^T * u 504*7c1dbaffSSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 505*7c1dbaffSSebastian Grimberg // vv has shape [nnodes, num_comp] 506*7c1dbaffSSebastian Grimberg // Sum into for transpose mode 507*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 508*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 509*7c1dbaffSSebastian Grimberg switch (rstr_type) { 510*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 511*7c1dbaffSSebastian Grimberg // Restriction with orientations 512*7c1dbaffSSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 513*7c1dbaffSSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 514*7c1dbaffSSebastian Grimberg for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 515*7c1dbaffSSebastian Grimberg // Iteration bound set to discard padding elements 516*7c1dbaffSSebastian Grimberg for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 517*7c1dbaffSSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 518*7c1dbaffSSebastian Grimberg } 519*7c1dbaffSSebastian Grimberg } 520*7c1dbaffSSebastian Grimberg } 521*7c1dbaffSSebastian Grimberg } 522*7c1dbaffSSebastian Grimberg } break; 523*7c1dbaffSSebastian Grimberg // LCOV_EXCL_START 524*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 525*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 526*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_DEFAULT: 527*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 528*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 529*7c1dbaffSSebastian Grimberg return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported"); 530*7c1dbaffSSebastian Grimberg // LCOV_EXCL_STOP 531*7c1dbaffSSebastian Grimberg } 532*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 533*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 534*7c1dbaffSSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 535*7c1dbaffSSebastian Grimberg return CEED_ERROR_SUCCESS; 536*7c1dbaffSSebastian Grimberg } 537*7c1dbaffSSebastian Grimberg 538*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 539f10650afSjeremylt // ElemRestriction Apply - Common Sizes 540f10650afSjeremylt //------------------------------------------------------------------------------ 541*7c1dbaffSSebastian Grimberg static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 542*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 543*7c1dbaffSSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 544*7c1dbaffSSebastian Grimberg CeedRestrictionType rstr_type; 545*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 5460c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 547*7c1dbaffSSebastian Grimberg switch (rstr_type) { 548*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 549*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_DEFAULT: 550*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 551*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 552*7c1dbaffSSebastian Grimberg if (use_signs) { 553*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 5540c73c039SSebastian Grimberg } else { 555*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 5560c73c039SSebastian Grimberg } 557*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 558*7c1dbaffSSebastian Grimberg if (use_signs && use_orients) { 559*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 560*7c1dbaffSSebastian Grimberg } else if (use_orients) { 561*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 562*7c1dbaffSSebastian Grimberg } else { 563*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 564*7c1dbaffSSebastian Grimberg } 565*7c1dbaffSSebastian Grimberg } 566*7c1dbaffSSebastian Grimberg } else { 567*7c1dbaffSSebastian Grimberg switch (rstr_type) { 568*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_STRIDED: 569*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_DEFAULT: 570*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 571*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_ORIENTED: 572*7c1dbaffSSebastian Grimberg if (use_signs) { 573*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 574*7c1dbaffSSebastian Grimberg } else { 575*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 576*7c1dbaffSSebastian Grimberg } 577*7c1dbaffSSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: 578*7c1dbaffSSebastian Grimberg if (use_signs && use_orients) { 579*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 580*7c1dbaffSSebastian Grimberg } else if (use_orients) { 581*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 582*7c1dbaffSSebastian Grimberg } else { 583*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 584*7c1dbaffSSebastian Grimberg } 585*7c1dbaffSSebastian Grimberg } 586*7c1dbaffSSebastian Grimberg } 587*7c1dbaffSSebastian Grimberg } 588*7c1dbaffSSebastian Grimberg 589*7c1dbaffSSebastian Grimberg static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 590*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 591*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 592*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 593d979a051Sjeremylt } 594d979a051Sjeremylt 5952b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 596*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 597*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 598*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 5994d2a38eeSjeremylt } 6004d2a38eeSjeremylt 6012b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 602*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 603*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 604*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 6059c36149bSjeremylt } 6069c36149bSjeremylt 6072b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 608*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 609*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 610*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 6119c36149bSjeremylt } 6129c36149bSjeremylt 6132b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 614*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 615*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 616*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 617d979a051Sjeremylt } 618d979a051Sjeremylt 6192b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 620*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 621*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 622*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 623d979a051Sjeremylt } 624d979a051Sjeremylt 6252b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 626*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 627*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 628*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 629d979a051Sjeremylt } 630d979a051Sjeremylt 6312b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 632*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 633*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 634*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 635d979a051Sjeremylt } 636d979a051Sjeremylt 637bf4d1581Sjeremylt // LCOV_EXCL_START 6382b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 639*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 640*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 641*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 642d979a051Sjeremylt } 643bf4d1581Sjeremylt // LCOV_EXCL_STOP 644d979a051Sjeremylt 6452b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 646*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 647*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 648*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 649d979a051Sjeremylt } 650d979a051Sjeremylt 651bf4d1581Sjeremylt // LCOV_EXCL_START 6522b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 653*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 654*7c1dbaffSSebastian Grimberg CeedVector v, CeedRequest *request) { 655*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 656d979a051Sjeremylt } 657bf4d1581Sjeremylt // LCOV_EXCL_STOP 658d979a051Sjeremylt 6592b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 660*7c1dbaffSSebastian Grimberg CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 6610c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 662*7c1dbaffSSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 6634d2a38eeSjeremylt } 6644d2a38eeSjeremylt 665f10650afSjeremylt //------------------------------------------------------------------------------ 666f10650afSjeremylt // ElemRestriction Apply 667f10650afSjeremylt //------------------------------------------------------------------------------ 6682b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 669d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 6702b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 6712b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 6722b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 6732b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 6747509a596Sjeremylt CeedElemRestriction_Ref *impl; 6752b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 6764d2a38eeSjeremylt 677*7c1dbaffSSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, true, true, u, v, request); 678f30b1135SSebastian Grimberg } 679f30b1135SSebastian Grimberg 680f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 681f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 682f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 683f30b1135SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 684f30b1135SSebastian Grimberg CeedInt num_blk, blk_size, num_comp, comp_stride; 685f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 686f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 687f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 688f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 689f30b1135SSebastian Grimberg CeedElemRestriction_Ref *impl; 690f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 691f30b1135SSebastian Grimberg 692*7c1dbaffSSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, true, u, v, request); 693*7c1dbaffSSebastian Grimberg } 694*7c1dbaffSSebastian Grimberg 695*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 696*7c1dbaffSSebastian Grimberg // ElemRestriction Apply Unoriented 697*7c1dbaffSSebastian Grimberg //------------------------------------------------------------------------------ 698*7c1dbaffSSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 699*7c1dbaffSSebastian Grimberg CeedInt num_blk, blk_size, num_comp, comp_stride; 700*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 701*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 702*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 703*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 704*7c1dbaffSSebastian Grimberg CeedElemRestriction_Ref *impl; 705*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 706*7c1dbaffSSebastian Grimberg 707*7c1dbaffSSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, false, u, v, request); 7089c36149bSjeremylt } 709be9261b7Sjeremylt 710f10650afSjeremylt //------------------------------------------------------------------------------ 711f10650afSjeremylt // ElemRestriction Apply Block 712f10650afSjeremylt //------------------------------------------------------------------------------ 7132b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 714074cb416Sjeremylt CeedRequest *request) { 715d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 7162b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 7172b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 7182b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 7197509a596Sjeremylt CeedElemRestriction_Ref *impl; 7202b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 7214d2a38eeSjeremylt 722*7c1dbaffSSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request); 7239c36149bSjeremylt } 724be9261b7Sjeremylt 725f10650afSjeremylt //------------------------------------------------------------------------------ 726bd33150aSjeremylt // ElemRestriction Get Offsets 727bd33150aSjeremylt //------------------------------------------------------------------------------ 7282b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 729bd33150aSjeremylt CeedElemRestriction_Ref *impl; 7302b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 731bd33150aSjeremylt Ceed ceed; 7322b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 733bd33150aSjeremylt 7346574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 735bd33150aSjeremylt 736bd33150aSjeremylt *offsets = impl->offsets; 737e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 738bd33150aSjeremylt } 739bd33150aSjeremylt 740bd33150aSjeremylt //------------------------------------------------------------------------------ 74177d1c127SSebastian Grimberg // ElemRestriction Get Orientations 74277d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 74377d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 74477d1c127SSebastian Grimberg CeedElemRestriction_Ref *impl; 74577d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 74677d1c127SSebastian Grimberg Ceed ceed; 74777d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 74877d1c127SSebastian Grimberg 749fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 75077d1c127SSebastian Grimberg 75177d1c127SSebastian Grimberg *orients = impl->orients; 75277d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 75377d1c127SSebastian Grimberg } 75477d1c127SSebastian Grimberg 75577d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 75677d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations 75777d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 7580c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 75977d1c127SSebastian Grimberg CeedElemRestriction_Ref *impl; 76077d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 76177d1c127SSebastian Grimberg Ceed ceed; 76277d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 76377d1c127SSebastian Grimberg 764fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 76577d1c127SSebastian Grimberg 76677d1c127SSebastian Grimberg *curl_orients = impl->curl_orients; 76777d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 76877d1c127SSebastian Grimberg } 76977d1c127SSebastian Grimberg 77077d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 771f10650afSjeremylt // ElemRestriction Destroy 772f10650afSjeremylt //------------------------------------------------------------------------------ 77321617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 774fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 7752b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 77621617c04Sjeremylt 7772b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 77877d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->orients_allocated)); 77977d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 7802b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 781e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 78221617c04Sjeremylt } 78321617c04Sjeremylt 784f10650afSjeremylt //------------------------------------------------------------------------------ 785f10650afSjeremylt // ElemRestriction Create 786f10650afSjeremylt //------------------------------------------------------------------------------ 787fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 7880c73c039SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction r) { 78921617c04Sjeremylt CeedElemRestriction_Ref *impl; 790d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 7912b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 7922b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 7932b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 7942b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 7952b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 7962b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 7974ce2993fSjeremylt Ceed ceed; 7982b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 79921617c04Sjeremylt 8006574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 8012b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 8023661185eSjeremylt 80392fe105eSJeremy L Thompson // Offsets data 804fcbe8c06SSebastian Grimberg CeedRestrictionType rstr_type; 805fcbe8c06SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 806fcbe8c06SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 80792fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 808d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 809d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 810d1d35e2fSjeremylt curr_ceed = parent_ceed; 8112b730f8bSJeremy L Thompson CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 8123661185eSjeremylt } 8133661185eSjeremylt const char *resource; 8142b730f8bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 8152b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 816d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 817e79b91d9SJeremy L Thompson CeedSize l_size; 8182b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 8193661185eSjeremylt 8202b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 8216574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 8226574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 8232b730f8bSJeremy L Thompson } 8242b730f8bSJeremy L Thompson } 8253661185eSjeremylt 82692fe105eSJeremy L Thompson // Copy data 827d1d35e2fSjeremylt switch (copy_mode) { 82821617c04Sjeremylt case CEED_COPY_VALUES: 8292b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 8302b730f8bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 831d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 83221617c04Sjeremylt break; 83321617c04Sjeremylt case CEED_OWN_POINTER: 834d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 835d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 83621617c04Sjeremylt break; 83721617c04Sjeremylt case CEED_USE_POINTER: 838d979a051Sjeremylt impl->offsets = offsets; 83921617c04Sjeremylt } 840fcbe8c06SSebastian Grimberg 841fcbe8c06SSebastian Grimberg // Orientation data 842fcbe8c06SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 8430305e208SSebastian Grimberg CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 844fcbe8c06SSebastian Grimberg switch (copy_mode) { 845fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 846fcbe8c06SSebastian Grimberg CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 847fcbe8c06SSebastian Grimberg memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 848fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 849fcbe8c06SSebastian Grimberg break; 850fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 851fcbe8c06SSebastian Grimberg impl->orients_allocated = (bool *)orients; 852fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 853fcbe8c06SSebastian Grimberg break; 854fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 855fcbe8c06SSebastian Grimberg impl->orients = orients; 856fcbe8c06SSebastian Grimberg } 857fcbe8c06SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 8580305e208SSebastian Grimberg CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 859fcbe8c06SSebastian Grimberg switch (copy_mode) { 860fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 861fcbe8c06SSebastian Grimberg CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 862fcbe8c06SSebastian Grimberg memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 863fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 864fcbe8c06SSebastian Grimberg break; 865fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 8660c73c039SSebastian Grimberg impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 867fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 868fcbe8c06SSebastian Grimberg break; 869fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 870fcbe8c06SSebastian Grimberg impl->curl_orients = curl_orients; 871fcbe8c06SSebastian Grimberg } 872fcbe8c06SSebastian Grimberg } 87392fe105eSJeremy L Thompson } 874fe2413ffSjeremylt 8752b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 876d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 8772b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 8782b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 879f30b1135SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 880*7c1dbaffSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 8812b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 8822b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 88377d1c127SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 88477d1c127SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 8852b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 886d979a051Sjeremylt 887d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 888d979a051Sjeremylt CeedInt idx = -1; 8892b730f8bSJeremy L Thompson if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 890d979a051Sjeremylt switch (idx) { 891d979a051Sjeremylt case 110: 892d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 893d979a051Sjeremylt break; 894d979a051Sjeremylt case 111: 895d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 896d979a051Sjeremylt break; 897d979a051Sjeremylt case 180: 898d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 899d979a051Sjeremylt break; 900d979a051Sjeremylt case 181: 901d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 902d979a051Sjeremylt break; 903d979a051Sjeremylt case 310: 904d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 905d979a051Sjeremylt break; 906d979a051Sjeremylt case 311: 907d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 908d979a051Sjeremylt break; 909d979a051Sjeremylt case 380: 910d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 911d979a051Sjeremylt break; 912d979a051Sjeremylt case 381: 913d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 914d979a051Sjeremylt break; 915bf4d1581Sjeremylt // LCOV_EXCL_START 916d979a051Sjeremylt case 510: 917d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 918d979a051Sjeremylt break; 919bf4d1581Sjeremylt // LCOV_EXCL_STOP 920d979a051Sjeremylt case 511: 921d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 922d979a051Sjeremylt break; 923bf4d1581Sjeremylt // LCOV_EXCL_START 924d979a051Sjeremylt case 580: 925d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 926d979a051Sjeremylt break; 927bf4d1581Sjeremylt // LCOV_EXCL_STOP 928d979a051Sjeremylt case 581: 929d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 930d979a051Sjeremylt break; 931d979a051Sjeremylt default: 932d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 933d979a051Sjeremylt break; 934d979a051Sjeremylt } 935d979a051Sjeremylt 936e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 93721617c04Sjeremylt } 938fc0567d9Srezgarshakeri 939fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 940