13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 11fcbe8c06SSebastian Grimberg #include <stdlib.h> 123d576824SJeremy L Thompson #include <string.h> 132b730f8bSJeremy L Thompson 1421617c04Sjeremylt #include "ceed-ref.h" 1521617c04Sjeremylt 16f10650afSjeremylt //------------------------------------------------------------------------------ 17f10650afSjeremylt // Core ElemRestriction Apply Code 18f10650afSjeremylt //------------------------------------------------------------------------------ 19*0c73c039SSebastian Grimberg static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 20*0c73c039SSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, bool use_orients, CeedVector u, 213bdd4e5aSSebastian Grimberg CeedVector v, CeedRequest *request) { 22*0c73c039SSebastian Grimberg Ceed ceed; 23*0c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 244ce2993fSjeremylt CeedElemRestriction_Ref *impl; 252b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2621617c04Sjeremylt const CeedScalar *uu; 2721617c04Sjeremylt CeedScalar *vv; 28d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 292b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 302b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 31d1d35e2fSjeremylt v_offset = start * blk_size * elem_size * num_comp; 32fcbe8c06SSebastian Grimberg CeedRestrictionType rstr_type; 33fcbe8c06SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 3421617c04Sjeremylt 357f90ec76Sjeremylt // Restriction from L-vector to E-vector 3621617c04Sjeremylt // Perform: v = r * u 37fcbe8c06SSebastian Grimberg // vv has shape [elem_size, num_comp, num_elem], row-major 38fcbe8c06SSebastian Grimberg // uu has shape [nnodes, num_comp] 39*0c73c039SSebastian Grimberg // Overwrite for notranspose mode 40*0c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 41*0c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 42fcbe8c06SSebastian Grimberg switch (rstr_type) { 43fcbe8c06SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 44d979a051Sjeremylt // No offsets provided, Identity Restriction 45d1d35e2fSjeremylt bool has_backend_strides; 462b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47d1d35e2fSjeremylt if (has_backend_strides) { 48d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 497f90ec76Sjeremylt // This if branch is left separate to allow better inlining 502b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 512b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 522b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 532b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 542b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 552b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 562b730f8bSJeremy L Thompson } 572b730f8bSJeremy L Thompson } 582b730f8bSJeremy L Thompson } 592b730f8bSJeremy L Thompson } 607f90ec76Sjeremylt } else { 617f90ec76Sjeremylt // User provided strides 627f90ec76Sjeremylt CeedInt strides[3]; 632b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 642b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 652b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 662b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 672b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 682b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 692b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 702b730f8bSJeremy L Thompson } 712b730f8bSJeremy L Thompson } 722b730f8bSJeremy L Thompson } 732b730f8bSJeremy L Thompson } 747509a596Sjeremylt } 75fcbe8c06SSebastian Grimberg } break; 76*0c73c039SSebastian Grimberg case CEED_RESTRICTION_DEFAULT: { 77fcbe8c06SSebastian Grimberg // Default restriction with offsets 782b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 792b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 802b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 81*0c73c039SSebastian Grimberg vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 82fcbe8c06SSebastian Grimberg } 83fcbe8c06SSebastian Grimberg } 84fcbe8c06SSebastian Grimberg } 85*0c73c039SSebastian Grimberg } break; 86*0c73c039SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 87fcbe8c06SSebastian Grimberg // Restriction with orientations 88fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 89fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 90fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 91*0c73c039SSebastian Grimberg vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = 92*0c73c039SSebastian Grimberg uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (use_orients && impl->orients[i + e * elem_size] ? -1.0 : 1.0); 93fcbe8c06SSebastian Grimberg } 94fcbe8c06SSebastian Grimberg } 95fcbe8c06SSebastian Grimberg } 96*0c73c039SSebastian Grimberg } break; 97*0c73c039SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 9877d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 99*0c73c039SSebastian Grimberg CeedCheck(elem_size > 1, ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_CURL_ORIENTED should always have elem_size > 1"); 100fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 101fcbe8c06SSebastian Grimberg CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 102*0c73c039SSebastian Grimberg CeedInt n = 0; 103*0c73c039SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 104*0c73c039SSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 105*0c73c039SSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 106*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 107*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 108*0c73c039SSebastian Grimberg uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 109*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size] 110*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size])); 111*0c73c039SSebastian Grimberg } 112*0c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 113*0c73c039SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 114*0c73c039SSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 115*0c73c039SSebastian Grimberg uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 116*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] 117*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size])) + 118*0c73c039SSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 119*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 120*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 121*0c73c039SSebastian Grimberg uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 122*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size] 123*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size])); 124*0c73c039SSebastian Grimberg } 125*0c73c039SSebastian Grimberg } 126*0c73c039SSebastian Grimberg CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 127*0c73c039SSebastian Grimberg vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 128*0c73c039SSebastian Grimberg uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 129*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] 130*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size])) + 131*0c73c039SSebastian Grimberg uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 132*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 133*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])); 1342b730f8bSJeremy L Thompson } 1352b730f8bSJeremy L Thompson } 1362b730f8bSJeremy L Thompson } 137*0c73c039SSebastian Grimberg } break; 138b435c5a6Srezgarshakeri } 139*0c73c039SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 140*0c73c039SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 141*0c73c039SSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 142*0c73c039SSebastian Grimberg return CEED_ERROR_SUCCESS; 143fcbe8c06SSebastian Grimberg } 144*0c73c039SSebastian Grimberg 145*0c73c039SSebastian Grimberg //------------------------------------------------------------------------------ 146*0c73c039SSebastian Grimberg // Core ElemRestriction Apply Transpose Code 147*0c73c039SSebastian Grimberg //------------------------------------------------------------------------------ 148*0c73c039SSebastian Grimberg static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 149*0c73c039SSebastian Grimberg const CeedInt comp_stride, CeedInt start, CeedInt stop, bool use_orients, CeedVector u, 150*0c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 151*0c73c039SSebastian Grimberg Ceed ceed; 152*0c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 153*0c73c039SSebastian Grimberg CeedElemRestriction_Ref *impl; 154*0c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 155*0c73c039SSebastian Grimberg const CeedScalar *uu; 156*0c73c039SSebastian Grimberg CeedScalar *vv; 157*0c73c039SSebastian Grimberg CeedInt num_elem, elem_size, v_offset; 158*0c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 159*0c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 160*0c73c039SSebastian Grimberg v_offset = start * blk_size * elem_size * num_comp; 161*0c73c039SSebastian Grimberg CeedRestrictionType rstr_type; 162*0c73c039SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 163*0c73c039SSebastian Grimberg 1647f90ec76Sjeremylt // Restriction from E-vector to L-vector 1658d94b059Sjeremylt // Performing v += r^T * u 1669475e044SSebastian Grimberg // uu has shape [elem_size, num_comp, num_elem], row-major 167fcbe8c06SSebastian Grimberg // vv has shape [nnodes, num_comp] 168*0c73c039SSebastian Grimberg // Sum into for transpose mode 169*0c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 170*0c73c039SSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 171fcbe8c06SSebastian Grimberg switch (rstr_type) { 172fcbe8c06SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 173d979a051Sjeremylt // No offsets provided, Identity Restriction 174d1d35e2fSjeremylt bool has_backend_strides; 1752b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 176d1d35e2fSjeremylt if (has_backend_strides) { 177d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1787f90ec76Sjeremylt // This if brach is left separate to allow better inlining 1792b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1802b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1812b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1822b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1832b730f8bSJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 1842b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1852b730f8bSJeremy L Thompson } 1862b730f8bSJeremy L Thompson } 1872b730f8bSJeremy L Thompson } 1882b730f8bSJeremy L Thompson } 1897f90ec76Sjeremylt } else { 1907f90ec76Sjeremylt // User provided strides 1917f90ec76Sjeremylt CeedInt strides[3]; 1922b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 1932b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1942b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1952b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1962b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1972b730f8bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 1982b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1992b730f8bSJeremy L Thompson } 2002b730f8bSJeremy L Thompson } 2012b730f8bSJeremy L Thompson } 2022b730f8bSJeremy L Thompson } 203523b8ea0Sjeremylt } 204fcbe8c06SSebastian Grimberg } break; 205*0c73c039SSebastian Grimberg case CEED_RESTRICTION_DEFAULT: { 206fcbe8c06SSebastian Grimberg // Default restriction with offsets 2072b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 2082b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 2092b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 2108d94b059Sjeremylt // Iteration bound set to discard padding elements 2112b730f8bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 212*0c73c039SSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 213fcbe8c06SSebastian Grimberg } 214fcbe8c06SSebastian Grimberg } 215fcbe8c06SSebastian Grimberg } 216fcbe8c06SSebastian Grimberg } 217*0c73c039SSebastian Grimberg } break; 218*0c73c039SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 219fcbe8c06SSebastian Grimberg // Restriction with orientations 220fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 221fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 222fcbe8c06SSebastian Grimberg for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 223fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 224fcbe8c06SSebastian Grimberg for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 225f30b1135SSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 226*0c73c039SSebastian Grimberg uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0); 227fcbe8c06SSebastian Grimberg } 228fcbe8c06SSebastian Grimberg } 229fcbe8c06SSebastian Grimberg } 230fcbe8c06SSebastian Grimberg } 231*0c73c039SSebastian Grimberg } break; 232*0c73c039SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 23377d1c127SSebastian Grimberg // Restriction with tridiagonal transformation 234*0c73c039SSebastian Grimberg CeedCheck(elem_size > 1, ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_CURL_ORIENTED should always have elem_size > 1"); 235fcbe8c06SSebastian Grimberg for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 236fcbe8c06SSebastian Grimberg for (CeedInt k = 0; k < num_comp; k++) { 237fcbe8c06SSebastian Grimberg // Iteration bound set to discard padding elements 238*0c73c039SSebastian Grimberg CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 239*0c73c039SSebastian Grimberg CeedScalar vvn[blk_size]; 240*0c73c039SSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 241*0c73c039SSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 242*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 243*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 244*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 245*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 246*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size] 247*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size])); 248*0c73c039SSebastian Grimberg } 249*0c73c039SSebastian Grimberg for (n = 1; n < elem_size - 1; n++) { 250*0c73c039SSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 251*0c73c039SSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 252*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 253*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] 254*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size])) + 255*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 256*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 257*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 258*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 259*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size] 260*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size])); 261*0c73c039SSebastian Grimberg } 262*0c73c039SSebastian Grimberg } 263*0c73c039SSebastian Grimberg for (CeedInt j = 0; j < blk_end; j++) { 264*0c73c039SSebastian Grimberg vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 265*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 266*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] 267*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size])) + 268*0c73c039SSebastian Grimberg uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 269*0c73c039SSebastian Grimberg (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 270*0c73c039SSebastian Grimberg : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])); 27121617c04Sjeremylt } 272b435c5a6Srezgarshakeri } 2732b730f8bSJeremy L Thompson } 274*0c73c039SSebastian Grimberg } break; 2752b730f8bSJeremy L Thompson } 2762b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 2772b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 2782b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 279e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 28021617c04Sjeremylt } 28121617c04Sjeremylt 282f10650afSjeremylt //------------------------------------------------------------------------------ 283f10650afSjeremylt // ElemRestriction Apply - Common Sizes 284f10650afSjeremylt //------------------------------------------------------------------------------ 2852b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 2873bdd4e5aSSebastian Grimberg CeedRequest *request) { 288*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 289*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, u, v, request); 290*0c73c039SSebastian Grimberg } else { 291*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, u, v, request); 292*0c73c039SSebastian Grimberg } 293d979a051Sjeremylt } 294d979a051Sjeremylt 2952b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 29677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 2973bdd4e5aSSebastian Grimberg CeedRequest *request) { 298*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 299*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 1, 1, start, stop, use_orients, u, v, request); 300*0c73c039SSebastian Grimberg } else { 301*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 1, 1, start, stop, use_orients, u, v, request); 302*0c73c039SSebastian Grimberg } 3034d2a38eeSjeremylt } 3044d2a38eeSjeremylt 3052b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 30677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3073bdd4e5aSSebastian Grimberg CeedRequest *request) { 308*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 309*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, u, v, request); 310*0c73c039SSebastian Grimberg } else { 311*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, u, v, request); 312*0c73c039SSebastian Grimberg } 3139c36149bSjeremylt } 3149c36149bSjeremylt 3152b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 31677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3173bdd4e5aSSebastian Grimberg CeedRequest *request) { 318*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 319*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 8, 1, start, stop, use_orients, u, v, request); 320*0c73c039SSebastian Grimberg } else { 321*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 8, 1, start, stop, use_orients, u, v, request); 322*0c73c039SSebastian Grimberg } 3239c36149bSjeremylt } 3249c36149bSjeremylt 3252b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 32677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3273bdd4e5aSSebastian Grimberg CeedRequest *request) { 328*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 329*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, u, v, request); 330*0c73c039SSebastian Grimberg } else { 331*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, u, v, request); 332*0c73c039SSebastian Grimberg } 333d979a051Sjeremylt } 334d979a051Sjeremylt 3352b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 33677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3373bdd4e5aSSebastian Grimberg CeedRequest *request) { 338*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 339*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 1, 1, start, stop, use_orients, u, v, request); 340*0c73c039SSebastian Grimberg } else { 341*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 1, 1, start, stop, use_orients, u, v, request); 342*0c73c039SSebastian Grimberg } 343d979a051Sjeremylt } 344d979a051Sjeremylt 3452b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 34677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3473bdd4e5aSSebastian Grimberg CeedRequest *request) { 348*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 349*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, u, v, request); 350*0c73c039SSebastian Grimberg } else { 351*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, u, v, request); 352*0c73c039SSebastian Grimberg } 353d979a051Sjeremylt } 354d979a051Sjeremylt 3552b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 35677d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3573bdd4e5aSSebastian Grimberg CeedRequest *request) { 358*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 359*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 8, 1, start, stop, use_orients, u, v, request); 360*0c73c039SSebastian Grimberg } else { 361*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 8, 1, start, stop, use_orients, u, v, request); 362*0c73c039SSebastian Grimberg } 363d979a051Sjeremylt } 364d979a051Sjeremylt 365bf4d1581Sjeremylt // LCOV_EXCL_START 3662b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 36777d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3683bdd4e5aSSebastian Grimberg CeedRequest *request) { 369*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 370*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, u, v, request); 371*0c73c039SSebastian Grimberg } else { 372*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, u, v, request); 373*0c73c039SSebastian Grimberg } 374d979a051Sjeremylt } 375bf4d1581Sjeremylt // LCOV_EXCL_STOP 376d979a051Sjeremylt 3772b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 37877d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3793bdd4e5aSSebastian Grimberg CeedRequest *request) { 380*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 381*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 1, 1, start, stop, use_orients, u, v, request); 382*0c73c039SSebastian Grimberg } else { 383*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 1, 1, start, stop, use_orients, u, v, request); 384*0c73c039SSebastian Grimberg } 385d979a051Sjeremylt } 386d979a051Sjeremylt 387bf4d1581Sjeremylt // LCOV_EXCL_START 3882b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 38977d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 3903bdd4e5aSSebastian Grimberg CeedRequest *request) { 391*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 392*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, u, v, request); 393*0c73c039SSebastian Grimberg } else { 394*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, u, v, request); 395*0c73c039SSebastian Grimberg } 396d979a051Sjeremylt } 397bf4d1581Sjeremylt // LCOV_EXCL_STOP 398d979a051Sjeremylt 3992b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 40077d1c127SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 4013bdd4e5aSSebastian Grimberg CeedRequest *request) { 402*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 403*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 8, 1, start, stop, use_orients, u, v, request); 404*0c73c039SSebastian Grimberg } else { 405*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 8, 1, start, stop, use_orients, u, v, request); 406*0c73c039SSebastian Grimberg } 407*0c73c039SSebastian Grimberg } 408*0c73c039SSebastian Grimberg 409*0c73c039SSebastian Grimberg static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 410*0c73c039SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, 411*0c73c039SSebastian Grimberg CeedVector v, CeedRequest *request) { 412*0c73c039SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 413*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, use_orients, u, v, request); 414*0c73c039SSebastian Grimberg } else { 415*0c73c039SSebastian Grimberg return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, use_orients, u, v, request); 416*0c73c039SSebastian Grimberg } 4174d2a38eeSjeremylt } 4184d2a38eeSjeremylt 419f10650afSjeremylt //------------------------------------------------------------------------------ 420f10650afSjeremylt // ElemRestriction Apply 421f10650afSjeremylt //------------------------------------------------------------------------------ 4222b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 423d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 4242b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 4252b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 4262b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 4272b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 4287509a596Sjeremylt CeedElemRestriction_Ref *impl; 4292b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 4304d2a38eeSjeremylt 431f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request); 432f30b1135SSebastian Grimberg } 433f30b1135SSebastian Grimberg 434f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 435f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 436f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 437f30b1135SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 438f30b1135SSebastian Grimberg CeedInt num_blk, blk_size, num_comp, comp_stride; 439f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 440f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 441f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 442f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 443f30b1135SSebastian Grimberg CeedElemRestriction_Ref *impl; 444f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 445f30b1135SSebastian Grimberg 446f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request); 4479c36149bSjeremylt } 448be9261b7Sjeremylt 449f10650afSjeremylt //------------------------------------------------------------------------------ 450f10650afSjeremylt // ElemRestriction Apply Block 451f10650afSjeremylt //------------------------------------------------------------------------------ 4522b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 453074cb416Sjeremylt CeedRequest *request) { 454d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 4552b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 4562b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 4572b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 4587509a596Sjeremylt CeedElemRestriction_Ref *impl; 4592b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 4604d2a38eeSjeremylt 461f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request); 4629c36149bSjeremylt } 463be9261b7Sjeremylt 464f10650afSjeremylt //------------------------------------------------------------------------------ 465bd33150aSjeremylt // ElemRestriction Get Offsets 466bd33150aSjeremylt //------------------------------------------------------------------------------ 4672b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 468bd33150aSjeremylt CeedElemRestriction_Ref *impl; 4692b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 470bd33150aSjeremylt Ceed ceed; 4712b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 472bd33150aSjeremylt 4736574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 474bd33150aSjeremylt 475bd33150aSjeremylt *offsets = impl->offsets; 476e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 477bd33150aSjeremylt } 478bd33150aSjeremylt 479bd33150aSjeremylt //------------------------------------------------------------------------------ 48077d1c127SSebastian Grimberg // ElemRestriction Get Orientations 48177d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 48277d1c127SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 48377d1c127SSebastian Grimberg CeedElemRestriction_Ref *impl; 48477d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 48577d1c127SSebastian Grimberg Ceed ceed; 48677d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 48777d1c127SSebastian Grimberg 488fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 48977d1c127SSebastian Grimberg 49077d1c127SSebastian Grimberg *orients = impl->orients; 49177d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 49277d1c127SSebastian Grimberg } 49377d1c127SSebastian Grimberg 49477d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 49577d1c127SSebastian Grimberg // ElemRestriction Get Curl-Conforming Orientations 49677d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 497*0c73c039SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 49877d1c127SSebastian Grimberg CeedElemRestriction_Ref *impl; 49977d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 50077d1c127SSebastian Grimberg Ceed ceed; 50177d1c127SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 50277d1c127SSebastian Grimberg 503fcbe8c06SSebastian Grimberg CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 50477d1c127SSebastian Grimberg 50577d1c127SSebastian Grimberg *curl_orients = impl->curl_orients; 50677d1c127SSebastian Grimberg return CEED_ERROR_SUCCESS; 50777d1c127SSebastian Grimberg } 50877d1c127SSebastian Grimberg 50977d1c127SSebastian Grimberg //------------------------------------------------------------------------------ 510f10650afSjeremylt // ElemRestriction Destroy 511f10650afSjeremylt //------------------------------------------------------------------------------ 51221617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 513fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 5142b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 51521617c04Sjeremylt 5162b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 51777d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->orients_allocated)); 51877d1c127SSebastian Grimberg CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 5192b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 520e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 52121617c04Sjeremylt } 52221617c04Sjeremylt 523f10650afSjeremylt //------------------------------------------------------------------------------ 524f10650afSjeremylt // ElemRestriction Create 525f10650afSjeremylt //------------------------------------------------------------------------------ 526fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 527*0c73c039SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction r) { 52821617c04Sjeremylt CeedElemRestriction_Ref *impl; 529d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 5302b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 5312b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 5322b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 5332b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 5342b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 5352b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 5364ce2993fSjeremylt Ceed ceed; 5372b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 53821617c04Sjeremylt 5396574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 5402b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 5413661185eSjeremylt 54292fe105eSJeremy L Thompson // Offsets data 543fcbe8c06SSebastian Grimberg CeedRestrictionType rstr_type; 544fcbe8c06SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 545fcbe8c06SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 54692fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 547d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 548d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 549d1d35e2fSjeremylt curr_ceed = parent_ceed; 5502b730f8bSJeremy L Thompson CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 5513661185eSjeremylt } 5523661185eSjeremylt const char *resource; 5532b730f8bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 5542b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 555d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 556e79b91d9SJeremy L Thompson CeedSize l_size; 5572b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 5583661185eSjeremylt 5592b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 5606574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 5616574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 5622b730f8bSJeremy L Thompson } 5632b730f8bSJeremy L Thompson } 5643661185eSjeremylt 56592fe105eSJeremy L Thompson // Copy data 566d1d35e2fSjeremylt switch (copy_mode) { 56721617c04Sjeremylt case CEED_COPY_VALUES: 5682b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 5692b730f8bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 570d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 57121617c04Sjeremylt break; 57221617c04Sjeremylt case CEED_OWN_POINTER: 573d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 574d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 57521617c04Sjeremylt break; 57621617c04Sjeremylt case CEED_USE_POINTER: 577d979a051Sjeremylt impl->offsets = offsets; 57821617c04Sjeremylt } 579fcbe8c06SSebastian Grimberg 580fcbe8c06SSebastian Grimberg // Orientation data 581fcbe8c06SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 5820305e208SSebastian Grimberg CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 583fcbe8c06SSebastian Grimberg switch (copy_mode) { 584fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 585fcbe8c06SSebastian Grimberg CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 586fcbe8c06SSebastian Grimberg memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 587fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 588fcbe8c06SSebastian Grimberg break; 589fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 590fcbe8c06SSebastian Grimberg impl->orients_allocated = (bool *)orients; 591fcbe8c06SSebastian Grimberg impl->orients = impl->orients_allocated; 592fcbe8c06SSebastian Grimberg break; 593fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 594fcbe8c06SSebastian Grimberg impl->orients = orients; 595fcbe8c06SSebastian Grimberg } 596fcbe8c06SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 5970305e208SSebastian Grimberg CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 598fcbe8c06SSebastian Grimberg switch (copy_mode) { 599fcbe8c06SSebastian Grimberg case CEED_COPY_VALUES: 600fcbe8c06SSebastian Grimberg CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 601fcbe8c06SSebastian Grimberg memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 602fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 603fcbe8c06SSebastian Grimberg break; 604fcbe8c06SSebastian Grimberg case CEED_OWN_POINTER: 605*0c73c039SSebastian Grimberg impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 606fcbe8c06SSebastian Grimberg impl->curl_orients = impl->curl_orients_allocated; 607fcbe8c06SSebastian Grimberg break; 608fcbe8c06SSebastian Grimberg case CEED_USE_POINTER: 609fcbe8c06SSebastian Grimberg impl->curl_orients = curl_orients; 610fcbe8c06SSebastian Grimberg } 611fcbe8c06SSebastian Grimberg } 61292fe105eSJeremy L Thompson } 613fe2413ffSjeremylt 6142b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 615d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 6162b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 6172b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 618f30b1135SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 6192b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 6202b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 62177d1c127SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 62277d1c127SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 6232b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 624d979a051Sjeremylt 625d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 626d979a051Sjeremylt CeedInt idx = -1; 6272b730f8bSJeremy L Thompson if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 628d979a051Sjeremylt switch (idx) { 629d979a051Sjeremylt case 110: 630d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 631d979a051Sjeremylt break; 632d979a051Sjeremylt case 111: 633d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 634d979a051Sjeremylt break; 635d979a051Sjeremylt case 180: 636d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 637d979a051Sjeremylt break; 638d979a051Sjeremylt case 181: 639d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 640d979a051Sjeremylt break; 641d979a051Sjeremylt case 310: 642d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 643d979a051Sjeremylt break; 644d979a051Sjeremylt case 311: 645d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 646d979a051Sjeremylt break; 647d979a051Sjeremylt case 380: 648d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 649d979a051Sjeremylt break; 650d979a051Sjeremylt case 381: 651d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 652d979a051Sjeremylt break; 653bf4d1581Sjeremylt // LCOV_EXCL_START 654d979a051Sjeremylt case 510: 655d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 656d979a051Sjeremylt break; 657bf4d1581Sjeremylt // LCOV_EXCL_STOP 658d979a051Sjeremylt case 511: 659d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 660d979a051Sjeremylt break; 661bf4d1581Sjeremylt // LCOV_EXCL_START 662d979a051Sjeremylt case 580: 663d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 664d979a051Sjeremylt break; 665bf4d1581Sjeremylt // LCOV_EXCL_STOP 666d979a051Sjeremylt case 581: 667d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 668d979a051Sjeremylt break; 669d979a051Sjeremylt default: 670d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 671d979a051Sjeremylt break; 672d979a051Sjeremylt } 673d979a051Sjeremylt 674e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 67521617c04Sjeremylt } 676fc0567d9Srezgarshakeri 677fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 678