1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 20 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v, 21 CeedRequest *request) { 22 CeedElemRestriction_Ref *impl; 23 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 24 const CeedScalar *uu; 25 CeedScalar *vv; 26 CeedInt num_elem, elem_size, v_offset; 27 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 28 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 29 v_offset = start * blk_size * elem_size * num_comp; 30 CeedRestrictionType rstr_type; 31 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 32 33 // Restriction from L-vector to E-vector 34 // Perform: v = r * u 35 // vv has shape [elem_size, num_comp, num_elem], row-major 36 // uu has shape [nnodes, num_comp] 37 // Overwrite for notranspose mode 38 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 39 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 40 switch (rstr_type) { 41 case CEED_RESTRICTION_STRIDED: { 42 // No offsets provided, Identity Restriction 43 bool has_backend_strides; 44 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 45 if (has_backend_strides) { 46 // CPU backend strides are {1, elem_size, elem_size*num_comp} 47 // This if branch is left separate to allow better inlining 48 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 49 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 50 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 51 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 52 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 53 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 54 } 55 } 56 } 57 } 58 } else { 59 // User provided strides 60 CeedInt strides[3]; 61 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 62 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 63 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 64 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 65 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 66 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 67 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 68 } 69 } 70 } 71 } 72 } 73 } break; 74 case CEED_RESTRICTION_DEFAULT: { 75 // Default restriction with offsets 76 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 77 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 78 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 79 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 80 } 81 } 82 } 83 } break; 84 case CEED_RESTRICTION_ORIENTED: { 85 // Restriction with orientations 86 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 87 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 88 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 89 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = 90 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 91 } 92 } 93 } 94 } break; 95 case CEED_RESTRICTION_CURL_ORIENTED: { 96 // Restriction with tridiagonal transformation 97 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 98 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 99 CeedInt n = 0; 100 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 101 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 102 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 103 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 104 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 105 impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 106 } 107 for (n = 1; n < elem_size - 1; n++) { 108 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 109 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 110 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 111 impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 112 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 113 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 114 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 115 impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 116 } 117 } 118 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 119 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 120 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 121 impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 122 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 123 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 124 } 125 } 126 } 127 } break; 128 } 129 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 130 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 131 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 132 return CEED_ERROR_SUCCESS; 133 } 134 135 //------------------------------------------------------------------------------ 136 // Core Unsigned ElemRestriction Apply Code 137 //------------------------------------------------------------------------------ 138 static inline int CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 139 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 140 CeedVector v, CeedRequest *request) { 141 Ceed ceed; 142 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 143 CeedElemRestriction_Ref *impl; 144 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 145 const CeedScalar *uu; 146 CeedScalar *vv; 147 CeedInt num_elem, elem_size, v_offset; 148 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 149 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 150 v_offset = start * blk_size * elem_size * num_comp; 151 CeedRestrictionType rstr_type; 152 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 153 154 // Restriction from L-vector to E-vector 155 // Perform: v = r * u 156 // vv has shape [elem_size, num_comp, num_elem], row-major 157 // uu has shape [nnodes, num_comp] 158 // Overwrite for notranspose mode 159 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 160 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 161 switch (rstr_type) { 162 case CEED_RESTRICTION_ORIENTED: { 163 // Restriction with orientations 164 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 165 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 166 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 167 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 168 } 169 } 170 } 171 } break; 172 case CEED_RESTRICTION_CURL_ORIENTED: { 173 // Restriction with tridiagonal transformation 174 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 175 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 176 CeedInt n = 0; 177 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 178 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 179 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 180 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 181 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 182 abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 183 } 184 for (n = 1; n < elem_size - 1; n++) { 185 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 186 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 187 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 188 abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 189 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 190 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 191 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 192 abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 193 } 194 } 195 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 196 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 197 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 198 abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 199 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 200 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 201 } 202 } 203 } 204 } break; 205 // LCOV_EXCL_START 206 case CEED_RESTRICTION_STRIDED: 207 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 208 case CEED_RESTRICTION_DEFAULT: 209 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 210 // LCOV_EXCL_STOP 211 } 212 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 213 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 214 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 215 return CEED_ERROR_SUCCESS; 216 } 217 218 //------------------------------------------------------------------------------ 219 // Core Unoriented ElemRestriction Apply Code 220 //------------------------------------------------------------------------------ 221 static inline int CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 222 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 223 CeedVector v, CeedRequest *request) { 224 Ceed ceed; 225 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 226 CeedElemRestriction_Ref *impl; 227 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 228 const CeedScalar *uu; 229 CeedScalar *vv; 230 CeedInt num_elem, elem_size, v_offset; 231 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 232 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 233 v_offset = start * blk_size * elem_size * num_comp; 234 CeedRestrictionType rstr_type; 235 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 236 237 // Restriction from L-vector to E-vector 238 // Perform: v = r * u 239 // vv has shape [elem_size, num_comp, num_elem], row-major 240 // uu has shape [nnodes, num_comp] 241 // Overwrite for notranspose mode 242 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 243 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 244 switch (rstr_type) { 245 case CEED_RESTRICTION_CURL_ORIENTED: { 246 // Restriction with tridiagonal transformation 247 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 248 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 249 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 250 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 251 } 252 } 253 } 254 } break; 255 // LCOV_EXCL_START 256 case CEED_RESTRICTION_STRIDED: 257 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 258 case CEED_RESTRICTION_DEFAULT: 259 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 260 case CEED_RESTRICTION_ORIENTED: 261 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported"); 262 // LCOV_EXCL_STOP 263 } 264 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 265 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 266 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 267 return CEED_ERROR_SUCCESS; 268 } 269 270 //------------------------------------------------------------------------------ 271 // Core ElemRestriction Apply Transpose Code 272 //------------------------------------------------------------------------------ 273 static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 274 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, CeedVector v, 275 CeedRequest *request) { 276 CeedElemRestriction_Ref *impl; 277 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 278 const CeedScalar *uu; 279 CeedScalar *vv; 280 CeedInt num_elem, elem_size, v_offset; 281 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 282 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 283 v_offset = start * blk_size * elem_size * num_comp; 284 CeedRestrictionType rstr_type; 285 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 286 287 // Restriction from E-vector to L-vector 288 // Performing v += r^T * u 289 // uu has shape [elem_size, num_comp, num_elem], row-major 290 // vv has shape [nnodes, num_comp] 291 // Sum into for transpose mode 292 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 293 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 294 switch (rstr_type) { 295 case CEED_RESTRICTION_STRIDED: { 296 // No offsets provided, Identity Restriction 297 bool has_backend_strides; 298 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 299 if (has_backend_strides) { 300 // CPU backend strides are {1, elem_size, elem_size*num_comp} 301 // This if brach is left separate to allow better inlining 302 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 303 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 304 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 305 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 306 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 307 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 308 } 309 } 310 } 311 } 312 } else { 313 // User provided strides 314 CeedInt strides[3]; 315 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 316 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 317 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 318 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 319 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 320 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 321 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 322 } 323 } 324 } 325 } 326 } 327 } break; 328 case CEED_RESTRICTION_DEFAULT: { 329 // Default restriction with offsets 330 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 331 for (CeedInt k = 0; k < num_comp; k++) { 332 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 333 // Iteration bound set to discard padding elements 334 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 335 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 336 } 337 } 338 } 339 } 340 } break; 341 case CEED_RESTRICTION_ORIENTED: { 342 // Restriction with orientations 343 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 344 for (CeedInt k = 0; k < num_comp; k++) { 345 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 346 // Iteration bound set to discard padding elements 347 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 348 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 349 uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 350 } 351 } 352 } 353 } 354 } break; 355 case CEED_RESTRICTION_CURL_ORIENTED: { 356 // Restriction with tridiagonal transformation 357 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 358 for (CeedInt k = 0; k < num_comp; k++) { 359 // Iteration bound set to discard padding elements 360 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 361 for (CeedInt j = 0; j < blk_end; j++) { 362 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 363 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 364 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 365 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 366 impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 367 } 368 for (n = 1; n < elem_size - 1; n++) { 369 for (CeedInt j = 0; j < blk_end; j++) { 370 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 371 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 372 impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 373 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 374 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 375 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 376 impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 377 } 378 } 379 for (CeedInt j = 0; j < blk_end; j++) { 380 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 381 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 382 impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 383 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 384 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 385 } 386 } 387 } 388 } break; 389 } 390 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 391 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 392 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 393 return CEED_ERROR_SUCCESS; 394 } 395 396 //------------------------------------------------------------------------------ 397 // Core Unsigned ElemRestriction Apply Transpose Code 398 //------------------------------------------------------------------------------ 399 static inline int CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 400 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 401 CeedVector v, CeedRequest *request) { 402 Ceed ceed; 403 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 404 CeedElemRestriction_Ref *impl; 405 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 406 const CeedScalar *uu; 407 CeedScalar *vv; 408 CeedInt num_elem, elem_size, v_offset; 409 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 410 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 411 v_offset = start * blk_size * elem_size * num_comp; 412 CeedRestrictionType rstr_type; 413 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 414 415 // Restriction from E-vector to L-vector 416 // Performing v += r^T * u 417 // uu has shape [elem_size, num_comp, num_elem], row-major 418 // vv has shape [nnodes, num_comp] 419 // Sum into for transpose mode 420 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 421 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 422 switch (rstr_type) { 423 case CEED_RESTRICTION_ORIENTED: { 424 // Restriction with orientations 425 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 426 for (CeedInt k = 0; k < num_comp; k++) { 427 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 428 // Iteration bound set to discard padding elements 429 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 430 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 431 } 432 } 433 } 434 } 435 } break; 436 case CEED_RESTRICTION_CURL_ORIENTED: { 437 // Restriction with tridiagonal transformation 438 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 439 for (CeedInt k = 0; k < num_comp; k++) { 440 // Iteration bound set to discard padding elements 441 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 442 for (CeedInt j = 0; j < blk_end; j++) { 443 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 444 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 445 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 446 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 447 abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 448 } 449 for (n = 1; n < elem_size - 1; n++) { 450 for (CeedInt j = 0; j < blk_end; j++) { 451 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 452 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 453 abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 454 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 455 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 456 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 457 abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 458 } 459 } 460 for (CeedInt j = 0; j < blk_end; j++) { 461 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 462 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 463 abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 464 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 465 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 466 } 467 } 468 } 469 } break; 470 // LCOV_EXCL_START 471 case CEED_RESTRICTION_STRIDED: 472 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 473 case CEED_RESTRICTION_DEFAULT: 474 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 475 // LCOV_EXCL_STOP 476 } 477 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 478 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 479 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 480 return CEED_ERROR_SUCCESS; 481 } 482 483 //------------------------------------------------------------------------------ 484 // Core Unoriented ElemRestriction Apply Transpose Code 485 //------------------------------------------------------------------------------ 486 static inline int CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 487 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedVector u, 488 CeedVector v, CeedRequest *request) { 489 Ceed ceed; 490 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 491 CeedElemRestriction_Ref *impl; 492 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 493 const CeedScalar *uu; 494 CeedScalar *vv; 495 CeedInt num_elem, elem_size, v_offset; 496 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 497 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 498 v_offset = start * blk_size * elem_size * num_comp; 499 CeedRestrictionType rstr_type; 500 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 501 502 // Restriction from E-vector to L-vector 503 // Performing v += r^T * u 504 // uu has shape [elem_size, num_comp, num_elem], row-major 505 // vv has shape [nnodes, num_comp] 506 // Sum into for transpose mode 507 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 508 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 509 switch (rstr_type) { 510 case CEED_RESTRICTION_CURL_ORIENTED: { 511 // Restriction with orientations 512 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 513 for (CeedInt k = 0; k < num_comp; k++) { 514 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 515 // Iteration bound set to discard padding elements 516 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 517 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 518 } 519 } 520 } 521 } 522 } break; 523 // LCOV_EXCL_START 524 case CEED_RESTRICTION_STRIDED: 525 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_STRIDED not supported"); 526 case CEED_RESTRICTION_DEFAULT: 527 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_DEFAULT not supported"); 528 case CEED_RESTRICTION_ORIENTED: 529 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_ORIENTED not supported"); 530 // LCOV_EXCL_STOP 531 } 532 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 533 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 534 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 535 return CEED_ERROR_SUCCESS; 536 } 537 538 //------------------------------------------------------------------------------ 539 // ElemRestriction Apply - Common Sizes 540 //------------------------------------------------------------------------------ 541 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 542 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 543 CeedVector u, CeedVector v, CeedRequest *request) { 544 CeedRestrictionType rstr_type; 545 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 546 if (t_mode == CEED_TRANSPOSE) { 547 switch (rstr_type) { 548 case CEED_RESTRICTION_STRIDED: 549 case CEED_RESTRICTION_DEFAULT: 550 return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 551 case CEED_RESTRICTION_ORIENTED: 552 if (use_signs) { 553 return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 554 } else { 555 return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 556 } 557 case CEED_RESTRICTION_CURL_ORIENTED: 558 if (use_signs && use_orients) { 559 return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 560 } else if (use_orients) { 561 return CeedElemRestrictionApplyUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 562 } else { 563 return CeedElemRestrictionApplyUnorientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 564 } 565 } 566 } else { 567 switch (rstr_type) { 568 case CEED_RESTRICTION_STRIDED: 569 case CEED_RESTRICTION_DEFAULT: 570 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 571 case CEED_RESTRICTION_ORIENTED: 572 if (use_signs) { 573 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 574 } else { 575 return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 576 } 577 case CEED_RESTRICTION_CURL_ORIENTED: 578 if (use_signs && use_orients) { 579 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 580 } else if (use_orients) { 581 return CeedElemRestrictionApplyUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 582 } else { 583 return CeedElemRestrictionApplyUnorientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, u, v, request); 584 } 585 } 586 } 587 } 588 589 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 590 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 591 CeedVector v, CeedRequest *request) { 592 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 593 } 594 595 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 596 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 597 CeedVector v, CeedRequest *request) { 598 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 599 } 600 601 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 602 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 603 CeedVector v, CeedRequest *request) { 604 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 605 } 606 607 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 608 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 609 CeedVector v, CeedRequest *request) { 610 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 611 } 612 613 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 614 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 615 CeedVector v, CeedRequest *request) { 616 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 617 } 618 619 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 620 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 621 CeedVector v, CeedRequest *request) { 622 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 623 } 624 625 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 626 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 627 CeedVector v, CeedRequest *request) { 628 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 629 } 630 631 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 632 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 633 CeedVector v, CeedRequest *request) { 634 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 635 } 636 637 // LCOV_EXCL_START 638 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 639 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 640 CeedVector v, CeedRequest *request) { 641 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 642 } 643 // LCOV_EXCL_STOP 644 645 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 646 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 647 CeedVector v, CeedRequest *request) { 648 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 649 } 650 651 // LCOV_EXCL_START 652 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 653 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 654 CeedVector v, CeedRequest *request) { 655 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 656 } 657 // LCOV_EXCL_STOP 658 659 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 660 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 661 CeedVector v, CeedRequest *request) { 662 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 663 } 664 665 //------------------------------------------------------------------------------ 666 // ElemRestriction Apply 667 //------------------------------------------------------------------------------ 668 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 669 CeedInt num_blk, blk_size, num_comp, comp_stride; 670 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 671 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 672 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 673 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 674 CeedElemRestriction_Ref *impl; 675 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 676 677 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, true, true, u, v, request); 678 } 679 680 //------------------------------------------------------------------------------ 681 // ElemRestriction Apply Unsigned 682 //------------------------------------------------------------------------------ 683 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 684 CeedInt num_blk, blk_size, num_comp, comp_stride; 685 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 686 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 687 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 688 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 689 CeedElemRestriction_Ref *impl; 690 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 691 692 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, true, u, v, request); 693 } 694 695 //------------------------------------------------------------------------------ 696 // ElemRestriction Apply Unoriented 697 //------------------------------------------------------------------------------ 698 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 699 CeedInt num_blk, blk_size, num_comp, comp_stride; 700 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 701 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 702 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 703 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 704 CeedElemRestriction_Ref *impl; 705 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 706 707 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, false, u, v, request); 708 } 709 710 //------------------------------------------------------------------------------ 711 // ElemRestriction Apply Block 712 //------------------------------------------------------------------------------ 713 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 714 CeedRequest *request) { 715 CeedInt blk_size, num_comp, comp_stride; 716 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 717 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 718 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 719 CeedElemRestriction_Ref *impl; 720 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 721 722 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request); 723 } 724 725 //------------------------------------------------------------------------------ 726 // ElemRestriction Get Offsets 727 //------------------------------------------------------------------------------ 728 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 729 CeedElemRestriction_Ref *impl; 730 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 731 Ceed ceed; 732 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 733 734 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 735 736 *offsets = impl->offsets; 737 return CEED_ERROR_SUCCESS; 738 } 739 740 //------------------------------------------------------------------------------ 741 // ElemRestriction Get Orientations 742 //------------------------------------------------------------------------------ 743 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 744 CeedElemRestriction_Ref *impl; 745 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 746 Ceed ceed; 747 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 748 749 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 750 751 *orients = impl->orients; 752 return CEED_ERROR_SUCCESS; 753 } 754 755 //------------------------------------------------------------------------------ 756 // ElemRestriction Get Curl-Conforming Orientations 757 //------------------------------------------------------------------------------ 758 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 759 CeedElemRestriction_Ref *impl; 760 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 761 Ceed ceed; 762 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 763 764 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 765 766 *curl_orients = impl->curl_orients; 767 return CEED_ERROR_SUCCESS; 768 } 769 770 //------------------------------------------------------------------------------ 771 // ElemRestriction Destroy 772 //------------------------------------------------------------------------------ 773 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 774 CeedElemRestriction_Ref *impl; 775 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 776 777 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 778 CeedCallBackend(CeedFree(&impl->orients_allocated)); 779 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 780 CeedCallBackend(CeedFree(&impl)); 781 return CEED_ERROR_SUCCESS; 782 } 783 784 //------------------------------------------------------------------------------ 785 // ElemRestriction Create 786 //------------------------------------------------------------------------------ 787 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 788 const CeedInt8 *curl_orients, CeedElemRestriction r) { 789 CeedElemRestriction_Ref *impl; 790 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 791 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 792 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 793 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 794 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 795 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 796 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 797 Ceed ceed; 798 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 799 800 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 801 CeedCallBackend(CeedCalloc(1, &impl)); 802 803 // Offsets data 804 CeedRestrictionType rstr_type; 805 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 806 if (rstr_type != CEED_RESTRICTION_STRIDED) { 807 // Check indices for ref or memcheck backends 808 Ceed parent_ceed = ceed, curr_ceed = NULL; 809 while (parent_ceed != curr_ceed) { 810 curr_ceed = parent_ceed; 811 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 812 } 813 const char *resource; 814 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 815 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 816 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 817 CeedSize l_size; 818 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 819 820 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 821 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 822 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 823 } 824 } 825 826 // Copy data 827 switch (copy_mode) { 828 case CEED_COPY_VALUES: 829 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 830 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 831 impl->offsets = impl->offsets_allocated; 832 break; 833 case CEED_OWN_POINTER: 834 impl->offsets_allocated = (CeedInt *)offsets; 835 impl->offsets = impl->offsets_allocated; 836 break; 837 case CEED_USE_POINTER: 838 impl->offsets = offsets; 839 } 840 841 // Orientation data 842 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 843 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 844 switch (copy_mode) { 845 case CEED_COPY_VALUES: 846 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 847 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 848 impl->orients = impl->orients_allocated; 849 break; 850 case CEED_OWN_POINTER: 851 impl->orients_allocated = (bool *)orients; 852 impl->orients = impl->orients_allocated; 853 break; 854 case CEED_USE_POINTER: 855 impl->orients = orients; 856 } 857 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 858 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 859 switch (copy_mode) { 860 case CEED_COPY_VALUES: 861 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 862 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 863 impl->curl_orients = impl->curl_orients_allocated; 864 break; 865 case CEED_OWN_POINTER: 866 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 867 impl->curl_orients = impl->curl_orients_allocated; 868 break; 869 case CEED_USE_POINTER: 870 impl->curl_orients = curl_orients; 871 } 872 } 873 } 874 875 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 876 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 877 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 878 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 879 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 880 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 881 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 882 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 883 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 884 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 885 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 886 887 // Set apply function based upon num_comp, blk_size, and comp_stride 888 CeedInt idx = -1; 889 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 890 switch (idx) { 891 case 110: 892 impl->Apply = CeedElemRestrictionApply_Ref_110; 893 break; 894 case 111: 895 impl->Apply = CeedElemRestrictionApply_Ref_111; 896 break; 897 case 180: 898 impl->Apply = CeedElemRestrictionApply_Ref_180; 899 break; 900 case 181: 901 impl->Apply = CeedElemRestrictionApply_Ref_181; 902 break; 903 case 310: 904 impl->Apply = CeedElemRestrictionApply_Ref_310; 905 break; 906 case 311: 907 impl->Apply = CeedElemRestrictionApply_Ref_311; 908 break; 909 case 380: 910 impl->Apply = CeedElemRestrictionApply_Ref_380; 911 break; 912 case 381: 913 impl->Apply = CeedElemRestrictionApply_Ref_381; 914 break; 915 // LCOV_EXCL_START 916 case 510: 917 impl->Apply = CeedElemRestrictionApply_Ref_510; 918 break; 919 // LCOV_EXCL_STOP 920 case 511: 921 impl->Apply = CeedElemRestrictionApply_Ref_511; 922 break; 923 // LCOV_EXCL_START 924 case 580: 925 impl->Apply = CeedElemRestrictionApply_Ref_580; 926 break; 927 // LCOV_EXCL_STOP 928 case 581: 929 impl->Apply = CeedElemRestrictionApply_Ref_581; 930 break; 931 default: 932 impl->Apply = CeedElemRestrictionApply_Ref_Core; 933 break; 934 } 935 936 return CEED_ERROR_SUCCESS; 937 } 938 939 //------------------------------------------------------------------------------ 940