121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 17ec3da8bcSJed Brown #include <ceed/ceed.h> 18ec3da8bcSJed Brown #include <ceed/backend.h> 193d576824SJeremy L Thompson #include <stdbool.h> 203d576824SJeremy L Thompson #include <string.h> 2121617c04Sjeremylt #include "ceed-ref.h" 2221617c04Sjeremylt 23f10650afSjeremylt //------------------------------------------------------------------------------ 24f10650afSjeremylt // Core ElemRestriction Apply Code 25f10650afSjeremylt //------------------------------------------------------------------------------ 26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 299c36149bSjeremylt CeedVector v, CeedRequest *request) { 3021617c04Sjeremylt int ierr; 314ce2993fSjeremylt CeedElemRestriction_Ref *impl; 32e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 3321617c04Sjeremylt const CeedScalar *uu; 3421617c04Sjeremylt CeedScalar *vv; 35d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 36d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38d1d35e2fSjeremylt v_offset = start*blk_size*elem_size*num_comp; 3921617c04Sjeremylt 40*b435c5a6Srezgarshakeri bool is_oriented; 41*b435c5a6Srezgarshakeri ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr); 42e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 439c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 449c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 45e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 469c774eddSJeremy L Thompson } else { 479c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 489c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 499c774eddSJeremy L Thompson } 507f90ec76Sjeremylt // Restriction from L-vector to E-vector 5121617c04Sjeremylt // Perform: v = r * u 52d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 53d979a051Sjeremylt // No offsets provided, Identity Restriction 54d979a051Sjeremylt if (!impl->offsets) { 55d1d35e2fSjeremylt bool has_backend_strides; 56d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 57e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 58d1d35e2fSjeremylt if (has_backend_strides) { 59d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 607f90ec76Sjeremylt // This if branch is left separate to allow better inlining 61d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 62e1b98f6eSjeremylt CeedPragmaSIMD 63d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 641d79ecccSjeremylt CeedPragmaSIMD 65d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 667509a596Sjeremylt CeedPragmaSIMD 67d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 68d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 69d1d35e2fSjeremylt = uu[n + k*elem_size + 70d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 717f90ec76Sjeremylt } else { 727f90ec76Sjeremylt // User provided strides 737f90ec76Sjeremylt CeedInt strides[3]; 74e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 75d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 767f90ec76Sjeremylt CeedPragmaSIMD 77d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 787f90ec76Sjeremylt CeedPragmaSIMD 79d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 807f90ec76Sjeremylt CeedPragmaSIMD 81d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 82d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 837509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 84d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*strides[2]]; 857509a596Sjeremylt } 8621617c04Sjeremylt } else { 87d979a051Sjeremylt // Offsets provided, standard or blocked restriction 88d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 89d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 90*b435c5a6Srezgarshakeri if (!is_oriented) { 91*b435c5a6Srezgarshakeri for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 92*b435c5a6Srezgarshakeri CeedPragmaSIMD 93*b435c5a6Srezgarshakeri for (CeedInt k = 0; k < num_comp; k++) 94*b435c5a6Srezgarshakeri CeedPragmaSIMD 95*b435c5a6Srezgarshakeri for (CeedInt i = 0; i < elem_size*blk_size; i++) 96*b435c5a6Srezgarshakeri vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 97*b435c5a6Srezgarshakeri = uu[impl->offsets[i+elem_size*e] + k*comp_stride]; 98*b435c5a6Srezgarshakeri } else { 99d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 100e1b98f6eSjeremylt CeedPragmaSIMD 101d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 102e1b98f6eSjeremylt CeedPragmaSIMD 103d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i++) 104d1d35e2fSjeremylt vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 105fc0567d9Srezgarshakeri = uu[impl->offsets[i+elem_size*e] + k*comp_stride] * 106fc0567d9Srezgarshakeri (impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.); 10721617c04Sjeremylt } 108*b435c5a6Srezgarshakeri } 10921617c04Sjeremylt } else { 1107f90ec76Sjeremylt // Restriction from E-vector to L-vector 1118d94b059Sjeremylt // Performing v += r^T * u 112d979a051Sjeremylt // No offsets provided, Identity Restriction 113d979a051Sjeremylt if (!impl->offsets) { 114d1d35e2fSjeremylt bool has_backend_strides; 115d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 116e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 117d1d35e2fSjeremylt if (has_backend_strides) { 118d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1197f90ec76Sjeremylt // This if brach is left separate to allow better inlining 120d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 121523b8ea0Sjeremylt CeedPragmaSIMD 122d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1237509a596Sjeremylt CeedPragmaSIMD 124d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1257f90ec76Sjeremylt CeedPragmaSIMD 126d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 127d1d35e2fSjeremylt vv[n + k*elem_size + (e+j)*elem_size*num_comp] 128d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 1297f90ec76Sjeremylt } else { 1307f90ec76Sjeremylt // User provided strides 1317f90ec76Sjeremylt CeedInt strides[3]; 132e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 133d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 1347f90ec76Sjeremylt CeedPragmaSIMD 135d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1367f90ec76Sjeremylt CeedPragmaSIMD 137d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1387f90ec76Sjeremylt CeedPragmaSIMD 139d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 1407509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 141d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 142523b8ea0Sjeremylt } 14321617c04Sjeremylt } else { 144d979a051Sjeremylt // Offsets provided, standard or blocked restriction 145d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 146d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 147*b435c5a6Srezgarshakeri if (!is_oriented) { 148*b435c5a6Srezgarshakeri for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 149*b435c5a6Srezgarshakeri for (CeedInt k = 0; k < num_comp; k++) 150*b435c5a6Srezgarshakeri for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 151*b435c5a6Srezgarshakeri // Iteration bound set to discard padding elements 152*b435c5a6Srezgarshakeri for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 153*b435c5a6Srezgarshakeri vv[impl->offsets[j+e*elem_size] + k*comp_stride] 154*b435c5a6Srezgarshakeri += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; 155*b435c5a6Srezgarshakeri } else { 156d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 157d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 158d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 1598d94b059Sjeremylt // Iteration bound set to discard padding elements 160d1d35e2fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 161d1d35e2fSjeremylt vv[impl->offsets[j+e*elem_size] + k*comp_stride] 162fc0567d9Srezgarshakeri += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * 163fc0567d9Srezgarshakeri (impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.); 16421617c04Sjeremylt } 16521617c04Sjeremylt } 166*b435c5a6Srezgarshakeri } 167e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 168e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 16921617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 17021617c04Sjeremylt *request = NULL; 171e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 17221617c04Sjeremylt } 17321617c04Sjeremylt 174f10650afSjeremylt //------------------------------------------------------------------------------ 175f10650afSjeremylt // ElemRestriction Apply - Common Sizes 176f10650afSjeremylt //------------------------------------------------------------------------------ 177d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 178d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 179d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 180d979a051Sjeremylt CeedVector v, CeedRequest *request) { 181d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 182d1d35e2fSjeremylt t_mode, u, v, request); 183d979a051Sjeremylt } 184d979a051Sjeremylt 185d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 186d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 187d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 188d979a051Sjeremylt CeedVector v, CeedRequest *request) { 189d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 1909c36149bSjeremylt u, v, request); 1914d2a38eeSjeremylt } 1924d2a38eeSjeremylt 193d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 194d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 195d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 196d979a051Sjeremylt CeedVector v, CeedRequest *request) { 197d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 198d1d35e2fSjeremylt t_mode, u, v, request); 1999c36149bSjeremylt } 2009c36149bSjeremylt 201d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 202d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 203d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 204d979a051Sjeremylt CeedVector v, CeedRequest *request) { 205d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 2069c36149bSjeremylt u, v, request); 2079c36149bSjeremylt } 2089c36149bSjeremylt 209d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 210d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 211d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 212d979a051Sjeremylt CeedVector v, CeedRequest *request) { 213d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 214d1d35e2fSjeremylt t_mode, u, v, request); 215d979a051Sjeremylt } 216d979a051Sjeremylt 217d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 218d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 219d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 220d979a051Sjeremylt CeedVector v, CeedRequest *request) { 221d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 222d979a051Sjeremylt u, v, request); 223d979a051Sjeremylt } 224d979a051Sjeremylt 225d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 226d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 227d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 228d979a051Sjeremylt CeedVector v, CeedRequest *request) { 229d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 230d1d35e2fSjeremylt t_mode, u, v, request); 231d979a051Sjeremylt } 232d979a051Sjeremylt 233d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 234d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 235d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 236d979a051Sjeremylt CeedVector v, CeedRequest *request) { 237d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 238d979a051Sjeremylt u, v, request); 239d979a051Sjeremylt } 240d979a051Sjeremylt 241bf4d1581Sjeremylt // LCOV_EXCL_START 242d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 243d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 244d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 245d979a051Sjeremylt CeedVector v, CeedRequest *request) { 246d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 247d1d35e2fSjeremylt t_mode, u, v, request); 248d979a051Sjeremylt } 249bf4d1581Sjeremylt // LCOV_EXCL_STOP 250d979a051Sjeremylt 251d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 252d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 253d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 254d979a051Sjeremylt CeedVector v, CeedRequest *request) { 255d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 256d979a051Sjeremylt u, v, request); 257d979a051Sjeremylt } 258d979a051Sjeremylt 259bf4d1581Sjeremylt // LCOV_EXCL_START 260d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 261d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 262d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 263d979a051Sjeremylt CeedVector v, CeedRequest *request) { 264d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 265d1d35e2fSjeremylt t_mode, u, v, request); 266d979a051Sjeremylt } 267bf4d1581Sjeremylt // LCOV_EXCL_STOP 268d979a051Sjeremylt 269d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 270d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 271d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 272d979a051Sjeremylt CeedVector v, CeedRequest *request) { 273d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 2749c36149bSjeremylt u, v, request); 2754d2a38eeSjeremylt } 2764d2a38eeSjeremylt 277f10650afSjeremylt //------------------------------------------------------------------------------ 278f10650afSjeremylt // ElemRestriction Apply 279f10650afSjeremylt //------------------------------------------------------------------------------ 280be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 281d1d35e2fSjeremylt CeedTransposeMode t_mode, CeedVector u, 282be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 283be9261b7Sjeremylt int ierr; 284d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 285d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 286d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 287d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 288d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2897509a596Sjeremylt CeedElemRestriction_Ref *impl; 290e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2914d2a38eeSjeremylt 292d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 293d979a051Sjeremylt request); 2949c36149bSjeremylt } 295be9261b7Sjeremylt 296f10650afSjeremylt //------------------------------------------------------------------------------ 297f10650afSjeremylt // ElemRestriction Apply Block 298f10650afSjeremylt //------------------------------------------------------------------------------ 299be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 300d1d35e2fSjeremylt CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 301074cb416Sjeremylt CeedRequest *request) { 3024d2a38eeSjeremylt int ierr; 303d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 304d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 305d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 306d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3077509a596Sjeremylt CeedElemRestriction_Ref *impl; 308e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 3094d2a38eeSjeremylt 310d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 311d1d35e2fSjeremylt u, v, request); 3129c36149bSjeremylt } 313be9261b7Sjeremylt 314f10650afSjeremylt //------------------------------------------------------------------------------ 315bd33150aSjeremylt // ElemRestriction Get Offsets 316bd33150aSjeremylt //------------------------------------------------------------------------------ 317bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 318d1d35e2fSjeremylt CeedMemType mem_type, const CeedInt **offsets) { 319bd33150aSjeremylt int ierr; 320bd33150aSjeremylt CeedElemRestriction_Ref *impl; 321e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 322bd33150aSjeremylt Ceed ceed; 323e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 324bd33150aSjeremylt 325d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 326bd33150aSjeremylt // LCOV_EXCL_START 327e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 328bd33150aSjeremylt // LCOV_EXCL_STOP 329bd33150aSjeremylt 330bd33150aSjeremylt *offsets = impl->offsets; 331e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 332bd33150aSjeremylt } 333bd33150aSjeremylt 334bd33150aSjeremylt //------------------------------------------------------------------------------ 335f10650afSjeremylt // ElemRestriction Destroy 336f10650afSjeremylt //------------------------------------------------------------------------------ 33721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 33821617c04Sjeremylt int ierr; 339fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 340e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 34121617c04Sjeremylt 342e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 343e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 344e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 34521617c04Sjeremylt } 34621617c04Sjeremylt 347f10650afSjeremylt //------------------------------------------------------------------------------ 348f10650afSjeremylt // ElemRestriction Create 349f10650afSjeremylt //------------------------------------------------------------------------------ 350d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 351d979a051Sjeremylt const CeedInt *offsets, 352d979a051Sjeremylt CeedElemRestriction r) { 35321617c04Sjeremylt int ierr; 35421617c04Sjeremylt CeedElemRestriction_Ref *impl; 355d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 356d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 357d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 358d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 359d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 360d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 361d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3624ce2993fSjeremylt Ceed ceed; 363e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 36421617c04Sjeremylt 365d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 366c042f62fSJeremy L Thompson // LCOV_EXCL_START 367e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 368c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 369e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 3703661185eSjeremylt 37192fe105eSJeremy L Thompson // Offsets data 372d1d35e2fSjeremylt bool is_strided; 373d1d35e2fSjeremylt ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 374d1d35e2fSjeremylt if (!is_strided) { 37592fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 376d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 377d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 378d1d35e2fSjeremylt curr_ceed = parent_ceed; 379d1d35e2fSjeremylt ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 3803661185eSjeremylt } 3813661185eSjeremylt const char *resource; 382d1d35e2fSjeremylt ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 383d1d35e2fSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") || 384d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/ref/blocked") || 385d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/serial") || 386d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 387d1d35e2fSjeremylt CeedInt l_size; 388d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 3893661185eSjeremylt 390d1d35e2fSjeremylt for (CeedInt i = 0; i < num_elem*elem_size; i++) 391d1d35e2fSjeremylt if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 3923661185eSjeremylt // LCOV_EXCL_START 393e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 394e15f9bd0SJeremy L Thompson "Restriction offset %d (%d) out of range " 395d1d35e2fSjeremylt "[0, %d]", i, offsets[i], l_size); 3963661185eSjeremylt // LCOV_EXCL_STOP 3973661185eSjeremylt } 3983661185eSjeremylt 39992fe105eSJeremy L Thompson // Copy data 400d1d35e2fSjeremylt switch (copy_mode) { 40121617c04Sjeremylt case CEED_COPY_VALUES: 402d1d35e2fSjeremylt ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 403e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 404d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 405d1d35e2fSjeremylt num_elem * elem_size * sizeof(offsets[0])); 406d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 40721617c04Sjeremylt break; 40821617c04Sjeremylt case CEED_OWN_POINTER: 409d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 410d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 41121617c04Sjeremylt break; 41221617c04Sjeremylt case CEED_USE_POINTER: 413d979a051Sjeremylt impl->offsets = offsets; 41421617c04Sjeremylt } 41592fe105eSJeremy L Thompson } 416fe2413ffSjeremylt 417e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 418d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 419e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 420fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 421e15f9bd0SJeremy L Thompson CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 422be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 423be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 424e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 425bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 426bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 427e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 428fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 429e15f9bd0SJeremy L Thompson CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 430d979a051Sjeremylt 431d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 432d979a051Sjeremylt CeedInt idx = -1; 433d1d35e2fSjeremylt if (blk_size < 10) 434d1d35e2fSjeremylt idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 435d979a051Sjeremylt switch (idx) { 436d979a051Sjeremylt case 110: 437d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 438d979a051Sjeremylt break; 439d979a051Sjeremylt case 111: 440d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 441d979a051Sjeremylt break; 442d979a051Sjeremylt case 180: 443d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 444d979a051Sjeremylt break; 445d979a051Sjeremylt case 181: 446d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 447d979a051Sjeremylt break; 448d979a051Sjeremylt case 310: 449d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 450d979a051Sjeremylt break; 451d979a051Sjeremylt case 311: 452d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 453d979a051Sjeremylt break; 454d979a051Sjeremylt case 380: 455d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 456d979a051Sjeremylt break; 457d979a051Sjeremylt case 381: 458d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 459d979a051Sjeremylt break; 460bf4d1581Sjeremylt // LCOV_EXCL_START 461d979a051Sjeremylt case 510: 462d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 463d979a051Sjeremylt break; 464bf4d1581Sjeremylt // LCOV_EXCL_STOP 465d979a051Sjeremylt case 511: 466d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 467d979a051Sjeremylt break; 468bf4d1581Sjeremylt // LCOV_EXCL_START 469d979a051Sjeremylt case 580: 470d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 471d979a051Sjeremylt break; 472bf4d1581Sjeremylt // LCOV_EXCL_STOP 473d979a051Sjeremylt case 581: 474d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 475d979a051Sjeremylt break; 476d979a051Sjeremylt default: 477d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 478d979a051Sjeremylt break; 479d979a051Sjeremylt } 480d979a051Sjeremylt 481e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 48221617c04Sjeremylt } 483fc0567d9Srezgarshakeri 484fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 485fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 486fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 487fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, 488fc0567d9Srezgarshakeri CeedCopyMode copy_mode, 489fc0567d9Srezgarshakeri const CeedInt *offsets, const bool *orient, 490fc0567d9Srezgarshakeri CeedElemRestriction r) { 491fc0567d9Srezgarshakeri int ierr; 492fc0567d9Srezgarshakeri CeedElemRestriction_Ref *impl; 493fc0567d9Srezgarshakeri CeedInt num_elem, elem_size; 494fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 495fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 496fc0567d9Srezgarshakeri ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r); 497fc0567d9Srezgarshakeri CeedChkBackend(ierr); 498fc0567d9Srezgarshakeri 499fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 500fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 501fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 502fc0567d9Srezgarshakeri switch (copy_mode) { 503fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 504fc0567d9Srezgarshakeri ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated); 505fc0567d9Srezgarshakeri CeedChkBackend(ierr); 506fc0567d9Srezgarshakeri memcpy(impl->orient_allocated, orient, 507fc0567d9Srezgarshakeri num_elem * elem_size * sizeof(orient[0])); 508fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 509fc0567d9Srezgarshakeri break; 510fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 511fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 512fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 513fc0567d9Srezgarshakeri break; 514fc0567d9Srezgarshakeri case CEED_USE_POINTER: 515fc0567d9Srezgarshakeri impl->orient = orient; 516fc0567d9Srezgarshakeri } 517fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 518fc0567d9Srezgarshakeri } 519f10650afSjeremylt //------------------------------------------------------------------------------ 520