1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 20 const CeedInt start, const CeedInt stop, const CeedInt num_elem, 21 const CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 22 CeedScalar *__restrict__ vv) { 23 // No offsets provided, identity restriction 24 bool has_backend_strides; 25 26 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 27 if (has_backend_strides) { 28 // CPU backend strides are {1, elem_size, elem_size*num_comp} 29 // This if branch is left separate to allow better inlining 30 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 31 for (CeedSize k = 0; k < num_comp; k++) { 32 for (CeedSize n = 0; n < elem_size; n++) { 33 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 34 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 35 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp]; 36 } 37 } 38 } 39 } 40 } else { 41 // User provided strides 42 CeedInt strides[3]; 43 44 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 45 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 46 for (CeedSize k = 0; k < num_comp; k++) { 47 for (CeedSize n = 0; n < elem_size; n++) { 48 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 49 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 50 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]]; 51 } 52 } 53 } 54 } 55 } 56 return CEED_ERROR_SUCCESS; 57 } 58 59 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 60 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 61 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 62 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 63 // Default restriction with offsets 64 CeedElemRestriction_Ref *impl; 65 66 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 67 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 68 for (CeedSize k = 0; k < num_comp; k++) { 69 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { 70 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 71 } 72 } 73 } 74 return CEED_ERROR_SUCCESS; 75 } 76 77 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 78 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 79 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 80 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 81 // Restriction with orientations 82 CeedElemRestriction_Ref *impl; 83 84 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 85 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 86 for (CeedSize k = 0; k < num_comp; k++) { 87 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { 88 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = 89 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 90 } 91 } 92 } 93 return CEED_ERROR_SUCCESS; 94 } 95 96 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 97 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 98 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 99 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 100 // Restriction with tridiagonal transformation 101 CeedElemRestriction_Ref *impl; 102 103 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 104 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 105 for (CeedSize k = 0; k < num_comp; k++) { 106 CeedSize n = 0; 107 108 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 109 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 110 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 111 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 112 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 113 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 114 } 115 for (n = 1; n < elem_size - 1; n++) { 116 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 117 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 118 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 119 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 120 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 121 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 122 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 123 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 124 } 125 } 126 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 127 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 128 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 129 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 130 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 131 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 132 } 133 } 134 } 135 return CEED_ERROR_SUCCESS; 136 } 137 138 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 139 const CeedInt block_size, const CeedInt comp_stride, 140 const CeedInt start, const CeedInt stop, const CeedInt num_elem, 141 const CeedInt elem_size, const CeedSize v_offset, 142 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 143 // Restriction with (unsigned) tridiagonal transformation 144 CeedElemRestriction_Ref *impl; 145 146 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 147 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 148 for (CeedSize k = 0; k < num_comp; k++) { 149 CeedSize n = 0; 150 151 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 152 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 153 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 154 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 155 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 156 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 157 } 158 for (n = 1; n < elem_size - 1; n++) { 159 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 160 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 161 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 162 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 163 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 164 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 165 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 166 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 167 } 168 } 169 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { 170 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 171 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 172 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 173 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 174 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 175 } 176 } 177 } 178 return CEED_ERROR_SUCCESS; 179 } 180 181 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 182 const CeedInt start, const CeedInt stop, const CeedInt num_elem, 183 const CeedInt elem_size, const CeedSize v_offset, 184 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 185 // No offsets provided, identity restriction 186 bool has_backend_strides; 187 188 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 189 if (has_backend_strides) { 190 // CPU backend strides are {1, elem_size, elem_size*num_comp} 191 // This if brach is left separate to allow better inlining 192 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 193 for (CeedSize k = 0; k < num_comp; k++) { 194 for (CeedSize n = 0; n < elem_size; n++) { 195 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 196 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 197 } 198 } 199 } 200 } 201 } else { 202 // User provided strides 203 CeedInt strides[3]; 204 205 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 206 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 207 for (CeedSize k = 0; k < num_comp; k++) { 208 for (CeedSize n = 0; n < elem_size; n++) { 209 CeedPragmaSIMD for (CeedSize j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 210 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 211 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 212 } 213 } 214 } 215 } 216 } 217 return CEED_ERROR_SUCCESS; 218 } 219 220 static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 221 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 222 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 223 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 224 // Default restriction with offsets 225 CeedElemRestriction_Ref *impl; 226 227 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 228 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 229 for (CeedSize k = 0; k < num_comp; k++) { 230 for (CeedSize i = 0; i < elem_size * block_size; i += block_size) { 231 // Iteration bound set to discard padding elements 232 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 233 CeedScalar vv_loc; 234 235 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 236 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 237 } 238 } 239 } 240 } 241 return CEED_ERROR_SUCCESS; 242 } 243 244 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 245 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 246 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 247 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 248 // Restriction with orientations 249 CeedElemRestriction_Ref *impl; 250 251 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 252 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 253 for (CeedSize k = 0; k < num_comp; k++) { 254 for (CeedSize i = 0; i < elem_size * block_size; i += block_size) { 255 // Iteration bound set to discard padding elements 256 for (CeedSize j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 257 CeedScalar vv_loc; 258 259 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 260 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 261 } 262 } 263 } 264 } 265 return CEED_ERROR_SUCCESS; 266 } 267 268 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 269 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, 270 const CeedInt num_elem, const CeedInt elem_size, const CeedSize v_offset, 271 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 272 // Restriction with tridiagonal transformation 273 CeedElemRestriction_Ref *impl; 274 CeedScalar vv_loc[block_size]; 275 276 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 277 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 278 for (CeedSize k = 0; k < num_comp; k++) { 279 // Iteration bound set to discard padding elements 280 const CeedSize block_end = CeedIntMin(block_size, num_elem - e); 281 CeedSize n = 0; 282 283 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 284 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 285 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 286 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 287 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 288 } 289 for (CeedSize j = 0; j < block_end; j++) { 290 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 291 } 292 for (n = 1; n < elem_size - 1; n++) { 293 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 294 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 295 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 296 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 297 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 298 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 299 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 300 } 301 for (CeedSize j = 0; j < block_end; j++) { 302 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 303 } 304 } 305 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 306 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 307 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 308 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 309 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 310 } 311 for (CeedSize j = 0; j < block_end; j++) { 312 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 313 } 314 } 315 } 316 return CEED_ERROR_SUCCESS; 317 } 318 319 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 320 const CeedInt block_size, const CeedInt comp_stride, 321 const CeedInt start, const CeedInt stop, const CeedInt num_elem, 322 const CeedInt elem_size, const CeedSize v_offset, 323 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 324 // Restriction with (unsigned) tridiagonal transformation 325 CeedElemRestriction_Ref *impl; 326 CeedScalar vv_loc[block_size]; 327 328 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 329 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { 330 for (CeedSize k = 0; k < num_comp; k++) { 331 // Iteration bound set to discard padding elements 332 const CeedSize block_end = CeedIntMin(block_size, num_elem - e); 333 CeedSize n = 0; 334 335 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 336 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 337 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 338 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 339 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 340 } 341 for (CeedSize j = 0; j < block_end; j++) { 342 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 343 } 344 for (n = 1; n < elem_size - 1; n++) { 345 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 346 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 347 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 348 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 349 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 350 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 351 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 352 } 353 for (CeedSize j = 0; j < block_end; j++) { 354 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 355 } 356 } 357 CeedPragmaSIMD for (CeedSize j = 0; j < block_end; j++) { 358 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 359 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 360 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 361 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 362 } 363 for (CeedSize j = 0; j < block_end; j++) { 364 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 365 } 366 } 367 } 368 return CEED_ERROR_SUCCESS; 369 } 370 371 static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt start, 372 const CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 373 CeedScalar *__restrict__ vv) { 374 CeedInt num_points, l_vec_offset; 375 CeedSize e_vec_offset = 0; 376 CeedElemRestriction_Ref *impl; 377 378 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 379 for (CeedInt e = start; e < stop; e++) { 380 l_vec_offset = impl->offsets[e]; 381 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 382 if (t_mode == CEED_NOTRANSPOSE) { 383 for (CeedSize i = 0; i < num_points; i++) { 384 for (CeedSize j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 385 } 386 } else { 387 for (CeedSize i = 0; i < num_points; i++) { 388 for (CeedSize j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] += uu[j * num_points + i + e_vec_offset]; 389 } 390 } 391 e_vec_offset += num_points * (CeedSize)num_comp; 392 } 393 return CEED_ERROR_SUCCESS; 394 } 395 396 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 397 const CeedInt comp_stride, const CeedInt start, const CeedInt stop, CeedTransposeMode t_mode, 398 bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 399 CeedInt num_elem, elem_size; 400 CeedSize v_offset = 0; 401 CeedRestrictionType rstr_type; 402 const CeedScalar *uu; 403 CeedScalar *vv; 404 405 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 406 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 407 v_offset = start * block_size * elem_size * (CeedSize)num_comp; 408 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 409 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 410 411 if (t_mode == CEED_TRANSPOSE) { 412 // Sum into for transpose mode, E-vector to L-vector 413 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 414 } else { 415 // Overwrite for notranspose mode, L-vector to E-vector 416 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 417 } 418 if (t_mode == CEED_TRANSPOSE) { 419 // Restriction from E-vector to L-vector 420 // Performing v += r^T * u 421 // uu has shape [elem_size, num_comp, num_elem], row-major 422 // vv has shape [nnodes, num_comp] 423 // Sum into for transpose mode 424 switch (rstr_type) { 425 case CEED_RESTRICTION_STRIDED: 426 CeedCallBackend(CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, 427 vv)); 428 break; 429 case CEED_RESTRICTION_STANDARD: 430 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 431 v_offset, uu, vv)); 432 break; 433 case CEED_RESTRICTION_ORIENTED: 434 if (use_signs) { 435 CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 436 elem_size, v_offset, uu, vv)); 437 } else { 438 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 439 v_offset, uu, vv)); 440 } 441 break; 442 case CEED_RESTRICTION_CURL_ORIENTED: 443 if (use_signs && use_orients) { 444 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 445 elem_size, v_offset, uu, vv)); 446 } else if (use_orients) { 447 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 448 num_elem, elem_size, v_offset, uu, vv)); 449 } else { 450 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 451 v_offset, uu, vv)); 452 } 453 break; 454 case CEED_RESTRICTION_POINTS: 455 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 456 break; 457 } 458 } else { 459 // Restriction from L-vector to E-vector 460 // Perform: v = r * u 461 // vv has shape [elem_size, num_comp, num_elem], row-major 462 // uu has shape [nnodes, num_comp] 463 // Overwrite for notranspose mode 464 switch (rstr_type) { 465 case CEED_RESTRICTION_STRIDED: 466 CeedCallBackend(CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, 467 uu, vv)); 468 break; 469 case CEED_RESTRICTION_STANDARD: 470 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 471 v_offset, uu, vv)); 472 break; 473 case CEED_RESTRICTION_ORIENTED: 474 if (use_signs) { 475 CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 476 elem_size, v_offset, uu, vv)); 477 } else { 478 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 479 elem_size, v_offset, uu, vv)); 480 } 481 break; 482 case CEED_RESTRICTION_CURL_ORIENTED: 483 if (use_signs && use_orients) { 484 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 485 elem_size, v_offset, uu, vv)); 486 } else if (use_orients) { 487 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 488 num_elem, elem_size, v_offset, uu, vv)); 489 } else { 490 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 491 elem_size, v_offset, uu, vv)); 492 } 493 break; 494 case CEED_RESTRICTION_POINTS: 495 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 496 break; 497 } 498 } 499 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 500 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 501 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 502 return CEED_ERROR_SUCCESS; 503 } 504 505 //------------------------------------------------------------------------------ 506 // ElemRestriction Apply - Common Sizes 507 //------------------------------------------------------------------------------ 508 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 509 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 510 CeedVector v, CeedRequest *request) { 511 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 512 } 513 514 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 515 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 516 CeedVector v, CeedRequest *request) { 517 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 518 } 519 520 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 521 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 522 CeedVector v, CeedRequest *request) { 523 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 524 } 525 526 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 527 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 528 CeedVector v, CeedRequest *request) { 529 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 530 } 531 532 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 533 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 534 CeedVector v, CeedRequest *request) { 535 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 536 } 537 538 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 539 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 540 CeedVector v, CeedRequest *request) { 541 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 542 } 543 544 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 545 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 546 CeedVector v, CeedRequest *request) { 547 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 548 } 549 550 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 551 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 552 CeedVector v, CeedRequest *request) { 553 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 554 } 555 556 // LCOV_EXCL_START 557 static int CeedElemRestrictionApply_Ref_410(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 558 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 559 CeedVector v, CeedRequest *request) { 560 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 561 } 562 563 static int CeedElemRestrictionApply_Ref_411(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 564 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 565 CeedVector v, CeedRequest *request) { 566 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 567 } 568 569 static int CeedElemRestrictionApply_Ref_480(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 570 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 571 CeedVector v, CeedRequest *request) { 572 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 573 } 574 575 static int CeedElemRestrictionApply_Ref_481(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 576 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 577 CeedVector v, CeedRequest *request) { 578 return CeedElemRestrictionApply_Ref_Core(rstr, 4, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 579 } 580 581 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 582 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 583 CeedVector v, CeedRequest *request) { 584 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 585 } 586 // LCOV_EXCL_STOP 587 588 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 589 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 590 CeedVector v, CeedRequest *request) { 591 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 592 } 593 594 // LCOV_EXCL_START 595 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 596 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 597 CeedVector v, CeedRequest *request) { 598 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 599 } 600 // LCOV_EXCL_STOP 601 602 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 603 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 604 CeedVector v, CeedRequest *request) { 605 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 606 } 607 608 //------------------------------------------------------------------------------ 609 // ElemRestriction Apply 610 //------------------------------------------------------------------------------ 611 static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 612 CeedInt num_block, block_size, num_comp, comp_stride; 613 CeedElemRestriction_Ref *impl; 614 615 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 616 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 617 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 618 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 619 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 620 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 621 return CEED_ERROR_SUCCESS; 622 } 623 624 //------------------------------------------------------------------------------ 625 // ElemRestriction Apply Unsigned 626 //------------------------------------------------------------------------------ 627 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 628 CeedRequest *request) { 629 CeedInt num_block, block_size, num_comp, comp_stride; 630 CeedElemRestriction_Ref *impl; 631 632 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 633 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 634 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 635 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 636 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 637 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 638 return CEED_ERROR_SUCCESS; 639 } 640 641 //------------------------------------------------------------------------------ 642 // ElemRestriction Apply Unoriented 643 //------------------------------------------------------------------------------ 644 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 645 CeedRequest *request) { 646 CeedInt num_block, block_size, num_comp, comp_stride; 647 CeedElemRestriction_Ref *impl; 648 649 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 650 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 651 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 652 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 653 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 654 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 655 return CEED_ERROR_SUCCESS; 656 } 657 658 //------------------------------------------------------------------------------ 659 // ElemRestriction Apply Points 660 //------------------------------------------------------------------------------ 661 static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 662 CeedRequest *request) { 663 CeedInt num_comp; 664 CeedElemRestriction_Ref *impl; 665 666 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 667 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 668 return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 669 } 670 671 //------------------------------------------------------------------------------ 672 // ElemRestriction Apply Block 673 //------------------------------------------------------------------------------ 674 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 675 CeedRequest *request) { 676 CeedInt block_size, num_comp, comp_stride; 677 CeedElemRestriction_Ref *impl; 678 679 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 680 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 681 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 682 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 683 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 684 return CEED_ERROR_SUCCESS; 685 } 686 687 //------------------------------------------------------------------------------ 688 // ElemRestriction Get Offsets 689 //------------------------------------------------------------------------------ 690 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 691 CeedElemRestriction_Ref *impl; 692 693 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 694 695 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 696 697 *offsets = impl->offsets; 698 return CEED_ERROR_SUCCESS; 699 } 700 701 //------------------------------------------------------------------------------ 702 // ElemRestriction Get Orientations 703 //------------------------------------------------------------------------------ 704 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 705 CeedElemRestriction_Ref *impl; 706 707 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 708 709 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 710 711 *orients = impl->orients; 712 return CEED_ERROR_SUCCESS; 713 } 714 715 //------------------------------------------------------------------------------ 716 // ElemRestriction Get Curl-Conforming Orientations 717 //------------------------------------------------------------------------------ 718 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 719 CeedElemRestriction_Ref *impl; 720 721 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 722 723 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 724 725 *curl_orients = impl->curl_orients; 726 return CEED_ERROR_SUCCESS; 727 } 728 729 //------------------------------------------------------------------------------ 730 // ElemRestriction Destroy 731 //------------------------------------------------------------------------------ 732 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 733 CeedElemRestriction_Ref *impl; 734 735 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 736 CeedCallBackend(CeedFree(&impl->offsets_owned)); 737 CeedCallBackend(CeedFree(&impl->orients_owned)); 738 CeedCallBackend(CeedFree(&impl->curl_orients_owned)); 739 CeedCallBackend(CeedFree(&impl)); 740 return CEED_ERROR_SUCCESS; 741 } 742 743 //------------------------------------------------------------------------------ 744 // ElemRestriction Create 745 //------------------------------------------------------------------------------ 746 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 747 const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 748 Ceed ceed; 749 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 750 CeedRestrictionType rstr_type; 751 CeedElemRestriction_Ref *impl; 752 753 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 754 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 755 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 756 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 757 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 758 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 759 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 760 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 761 762 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 763 764 CeedCallBackend(CeedCalloc(1, &impl)); 765 CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 766 767 // Set layouts 768 { 769 bool has_backend_strides; 770 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 771 772 CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 773 if (rstr_type == CEED_RESTRICTION_STRIDED) { 774 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 775 if (has_backend_strides) { 776 CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 777 } 778 } 779 } 780 781 // Expand E-vector size for AtPoints 782 if (rstr_type == CEED_RESTRICTION_POINTS) { 783 CeedSize max_points = 0, num_points_total = 0; 784 785 for (CeedInt i = 0; i < num_elem; i++) { 786 CeedInt num_points = offsets[i + 1] - offsets[i]; 787 788 max_points = CeedIntMax(max_points, num_points); 789 num_points_total += num_points; 790 } 791 // -- Increase size for last element 792 num_points_total += (max_points - (offsets[num_elem] - offsets[num_elem - 1])); 793 CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, num_points_total * num_comp)); 794 } 795 796 // Offsets data 797 if (rstr_type != CEED_RESTRICTION_STRIDED) { 798 const char *resource; 799 800 // Check indices for ref or memcheck backends 801 { 802 Ceed current = ceed, ceed_parent = NULL; 803 804 CeedCallBackend(CeedGetParent(current, &ceed_parent)); 805 CeedCallBackend(CeedGetResource(ceed_parent, &resource)); 806 CeedCallBackend(CeedDestroy(&ceed_parent)); 807 } 808 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked")) { 809 CeedSize l_size; 810 811 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 812 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 813 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 814 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 815 } 816 } 817 818 // Copy data 819 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 820 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 821 CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, num_offsets, &impl->offsets_owned, &impl->offsets_borrowed, &impl->offsets)); 822 823 // Orientation data 824 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 825 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 826 CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, num_offsets, &impl->orients_owned, &impl->orients_borrowed, &impl->orients)); 827 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 828 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 829 CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * num_offsets, &impl->curl_orients_owned, &impl->curl_orients_borrowed, 830 &impl->curl_orients)); 831 } 832 } 833 834 // Set apply function based upon num_comp, block_size, and comp_stride 835 CeedInt index = -1; 836 837 if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 838 switch (index) { 839 case 110: 840 impl->Apply = CeedElemRestrictionApply_Ref_110; 841 break; 842 case 111: 843 impl->Apply = CeedElemRestrictionApply_Ref_111; 844 break; 845 case 180: 846 impl->Apply = CeedElemRestrictionApply_Ref_180; 847 break; 848 case 181: 849 impl->Apply = CeedElemRestrictionApply_Ref_181; 850 break; 851 case 310: 852 impl->Apply = CeedElemRestrictionApply_Ref_310; 853 break; 854 case 311: 855 impl->Apply = CeedElemRestrictionApply_Ref_311; 856 break; 857 case 380: 858 impl->Apply = CeedElemRestrictionApply_Ref_380; 859 break; 860 case 381: 861 impl->Apply = CeedElemRestrictionApply_Ref_381; 862 break; 863 // LCOV_EXCL_START 864 case 410: 865 impl->Apply = CeedElemRestrictionApply_Ref_410; 866 break; 867 case 411: 868 impl->Apply = CeedElemRestrictionApply_Ref_411; 869 break; 870 case 480: 871 impl->Apply = CeedElemRestrictionApply_Ref_480; 872 break; 873 case 481: 874 impl->Apply = CeedElemRestrictionApply_Ref_481; 875 break; 876 case 510: 877 impl->Apply = CeedElemRestrictionApply_Ref_510; 878 break; 879 // LCOV_EXCL_STOP 880 case 511: 881 impl->Apply = CeedElemRestrictionApply_Ref_511; 882 break; 883 // LCOV_EXCL_START 884 case 580: 885 impl->Apply = CeedElemRestrictionApply_Ref_580; 886 break; 887 // LCOV_EXCL_STOP 888 case 581: 889 impl->Apply = CeedElemRestrictionApply_Ref_581; 890 break; 891 default: 892 impl->Apply = CeedElemRestrictionApply_Ref_Core; 893 break; 894 } 895 896 // Register backend functions 897 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 898 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 899 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 900 if (rstr_type == CEED_RESTRICTION_POINTS) { 901 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 902 } 903 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 904 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 905 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 906 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 907 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 908 CeedCallBackend(CeedDestroy(&ceed)); 909 return CEED_ERROR_SUCCESS; 910 } 911 912 //------------------------------------------------------------------------------ 913