1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 20 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 21 CeedInt v_offset, const CeedScalar *__restrict__ uu, 22 CeedScalar *__restrict__ vv) { 23 // No offsets provided, identity restriction 24 bool has_backend_strides; 25 26 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 27 if (has_backend_strides) { 28 // CPU backend strides are {1, elem_size, elem_size*num_comp} 29 // This if branch is left separate to allow better inlining 30 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 31 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 32 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 33 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 34 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 35 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 36 } 37 } 38 } 39 } 40 } else { 41 // User provided strides 42 CeedInt strides[3]; 43 44 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 45 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 46 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 47 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 48 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 49 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 50 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 51 } 52 } 53 } 54 } 55 } 56 return CEED_ERROR_SUCCESS; 57 } 58 59 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 60 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 61 CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 62 CeedScalar *__restrict__ vv) { 63 // Default restriction with offsets 64 CeedElemRestriction_Ref *impl; 65 66 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 67 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 68 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 69 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 70 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 71 } 72 } 73 } 74 return CEED_ERROR_SUCCESS; 75 } 76 77 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 78 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 79 CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 80 CeedScalar *__restrict__ vv) { 81 // Restriction with orientations 82 CeedElemRestriction_Ref *impl; 83 84 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 85 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 86 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 87 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 88 vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = 89 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 90 } 91 } 92 } 93 return CEED_ERROR_SUCCESS; 94 } 95 96 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 97 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 98 CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 99 CeedScalar *__restrict__ vv) { 100 // Restriction with tridiagonal transformation 101 CeedElemRestriction_Ref *impl; 102 103 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 104 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 105 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 106 CeedInt n = 0; 107 108 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 109 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 110 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 111 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 112 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 113 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 114 } 115 CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 116 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 117 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 118 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 119 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 120 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 121 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 122 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 123 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 124 } 125 } 126 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 127 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 128 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 129 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 130 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 131 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 132 } 133 } 134 } 135 return CEED_ERROR_SUCCESS; 136 } 137 138 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 139 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 140 CeedInt stop, CeedInt num_elem, CeedInt elem_size, 141 CeedInt v_offset, const CeedScalar *__restrict__ uu, 142 CeedScalar *__restrict__ vv) { 143 // Restriction with (unsigned) tridiagonal transformation 144 CeedElemRestriction_Ref *impl; 145 146 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 147 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 148 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 149 CeedInt n = 0; 150 151 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 152 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 153 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 154 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 155 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 156 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 157 } 158 CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 159 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 160 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 161 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 162 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 163 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 164 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 165 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 166 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 167 } 168 } 169 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 170 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = 171 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 172 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 173 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 174 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 175 } 176 } 177 } 178 return CEED_ERROR_SUCCESS; 179 } 180 181 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 182 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 183 CeedInt v_offset, const CeedScalar *__restrict__ uu, 184 CeedScalar *__restrict__ vv) { 185 // No offsets provided, identity restriction 186 bool has_backend_strides; 187 188 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 189 if (has_backend_strides) { 190 // CPU backend strides are {1, elem_size, elem_size*num_comp} 191 // This if brach is left separate to allow better inlining 192 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 193 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 194 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 195 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 196 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 197 } 198 } 199 } 200 } 201 } else { 202 // User provided strides 203 CeedInt strides[3]; 204 205 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 206 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 207 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 208 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 209 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 210 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 211 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 212 } 213 } 214 } 215 } 216 } 217 return CEED_ERROR_SUCCESS; 218 } 219 220 static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 221 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 222 CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 223 CeedScalar *__restrict__ vv) { 224 // Default restriction with offsets 225 CeedElemRestriction_Ref *impl; 226 227 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 228 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 229 for (CeedInt k = 0; k < num_comp; k++) { 230 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 231 // Iteration bound set to discard padding elements 232 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 233 CeedScalar vv_loc; 234 235 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 236 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 237 } 238 } 239 } 240 } 241 return CEED_ERROR_SUCCESS; 242 } 243 244 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 245 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 246 CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 247 CeedScalar *__restrict__ vv) { 248 // Restriction with orientations 249 CeedElemRestriction_Ref *impl; 250 251 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 252 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 253 for (CeedInt k = 0; k < num_comp; k++) { 254 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 255 // Iteration bound set to discard padding elements 256 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 257 CeedScalar vv_loc; 258 259 vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 260 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 261 } 262 } 263 } 264 } 265 return CEED_ERROR_SUCCESS; 266 } 267 268 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 269 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 270 CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, 271 CeedScalar *__restrict__ vv) { 272 // Restriction with tridiagonal transformation 273 CeedElemRestriction_Ref *impl; 274 CeedScalar vv_loc[block_size]; 275 276 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 277 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 278 for (CeedInt k = 0; k < num_comp; k++) { 279 // Iteration bound set to discard padding elements 280 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 281 CeedInt n = 0; 282 283 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 284 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 285 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 286 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 287 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 288 } 289 for (CeedInt j = 0; j < block_end; j++) { 290 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 291 } 292 for (n = 1; n < elem_size - 1; n++) { 293 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 294 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 295 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 296 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 297 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 298 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 299 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 300 } 301 for (CeedInt j = 0; j < block_end; j++) { 302 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 303 } 304 } 305 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 306 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 307 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 308 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 309 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 310 } 311 for (CeedInt j = 0; j < block_end; j++) { 312 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 313 } 314 } 315 } 316 return CEED_ERROR_SUCCESS; 317 } 318 319 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 320 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 321 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 322 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 323 // Restriction with (unsigned) tridiagonal transformation 324 CeedElemRestriction_Ref *impl; 325 CeedScalar vv_loc[block_size]; 326 327 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 328 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 329 for (CeedInt k = 0; k < num_comp; k++) { 330 // Iteration bound set to discard padding elements 331 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 332 CeedInt n = 0; 333 334 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 335 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 336 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 337 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 338 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 339 } 340 for (CeedInt j = 0; j < block_end; j++) { 341 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 342 } 343 for (n = 1; n < elem_size - 1; n++) { 344 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 345 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 346 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 347 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 348 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 349 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 350 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 351 } 352 for (CeedInt j = 0; j < block_end; j++) { 353 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 354 } 355 } 356 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 357 vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 358 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 359 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 360 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 361 } 362 for (CeedInt j = 0; j < block_end; j++) { 363 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 364 } 365 } 366 } 367 return CEED_ERROR_SUCCESS; 368 } 369 370 static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 371 CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 372 CeedScalar *__restrict__ vv) { 373 CeedInt num_points, l_vec_offset, e_vec_offset = 0; 374 CeedElemRestriction_Ref *impl; 375 376 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 377 for (CeedInt e = start; e < stop; e++) { 378 l_vec_offset = impl->offsets[e]; 379 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 380 if (t_mode == CEED_NOTRANSPOSE) { 381 for (CeedInt i = 0; i < num_points; i++) { 382 for (CeedInt j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 383 } 384 } else { 385 for (CeedInt i = 0; i < num_points; i++) { 386 for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * num_points + i + e_vec_offset]; 387 } 388 } 389 e_vec_offset += num_points * num_comp; 390 } 391 return CEED_ERROR_SUCCESS; 392 } 393 394 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 395 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, 396 bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 397 CeedInt num_elem, elem_size, v_offset; 398 CeedRestrictionType rstr_type; 399 const CeedScalar *uu; 400 CeedScalar *vv; 401 402 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 403 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 404 v_offset = start * block_size * elem_size * num_comp; 405 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 406 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 407 408 if (t_mode == CEED_TRANSPOSE) { 409 // Sum into for transpose mode, E-vector to L-vector 410 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 411 } else { 412 // Overwrite for notranspose mode, L-vector to E-vector 413 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 414 } 415 if (t_mode == CEED_TRANSPOSE) { 416 // Restriction from E-vector to L-vector 417 // Performing v += r^T * u 418 // uu has shape [elem_size, num_comp, num_elem], row-major 419 // vv has shape [nnodes, num_comp] 420 // Sum into for transpose mode 421 switch (rstr_type) { 422 case CEED_RESTRICTION_STRIDED: 423 CeedCallBackend( 424 CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 425 break; 426 case CEED_RESTRICTION_STANDARD: 427 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 428 v_offset, uu, vv)); 429 break; 430 case CEED_RESTRICTION_ORIENTED: 431 if (use_signs) { 432 CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 433 elem_size, v_offset, uu, vv)); 434 } else { 435 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 436 v_offset, uu, vv)); 437 } 438 break; 439 case CEED_RESTRICTION_CURL_ORIENTED: 440 if (use_signs && use_orients) { 441 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 442 elem_size, v_offset, uu, vv)); 443 } else if (use_orients) { 444 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 445 num_elem, elem_size, v_offset, uu, vv)); 446 } else { 447 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 448 v_offset, uu, vv)); 449 } 450 break; 451 case CEED_RESTRICTION_POINTS: 452 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 453 break; 454 } 455 } else { 456 // Restriction from L-vector to E-vector 457 // Perform: v = r * u 458 // vv has shape [elem_size, num_comp, num_elem], row-major 459 // uu has shape [nnodes, num_comp] 460 // Overwrite for notranspose mode 461 switch (rstr_type) { 462 case CEED_RESTRICTION_STRIDED: 463 CeedCallBackend( 464 CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 465 break; 466 case CEED_RESTRICTION_STANDARD: 467 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 468 v_offset, uu, vv)); 469 break; 470 case CEED_RESTRICTION_ORIENTED: 471 if (use_signs) { 472 CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 473 elem_size, v_offset, uu, vv)); 474 } else { 475 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 476 elem_size, v_offset, uu, vv)); 477 } 478 break; 479 case CEED_RESTRICTION_CURL_ORIENTED: 480 if (use_signs && use_orients) { 481 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 482 elem_size, v_offset, uu, vv)); 483 } else if (use_orients) { 484 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 485 num_elem, elem_size, v_offset, uu, vv)); 486 } else { 487 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 488 elem_size, v_offset, uu, vv)); 489 } 490 break; 491 case CEED_RESTRICTION_POINTS: 492 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 493 break; 494 } 495 } 496 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 497 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 498 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 499 return CEED_ERROR_SUCCESS; 500 } 501 502 //------------------------------------------------------------------------------ 503 // ElemRestriction Apply - Common Sizes 504 //------------------------------------------------------------------------------ 505 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 506 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 507 CeedVector v, CeedRequest *request) { 508 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 509 } 510 511 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 512 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 513 CeedVector v, CeedRequest *request) { 514 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 515 } 516 517 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 518 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 519 CeedVector v, CeedRequest *request) { 520 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 521 } 522 523 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 524 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 525 CeedVector v, CeedRequest *request) { 526 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 527 } 528 529 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 530 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 531 CeedVector v, CeedRequest *request) { 532 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 533 } 534 535 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 536 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 537 CeedVector v, CeedRequest *request) { 538 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 539 } 540 541 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 542 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 543 CeedVector v, CeedRequest *request) { 544 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 545 } 546 547 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 548 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 549 CeedVector v, CeedRequest *request) { 550 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 551 } 552 553 // LCOV_EXCL_START 554 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 555 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 556 CeedVector v, CeedRequest *request) { 557 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 558 } 559 // LCOV_EXCL_STOP 560 561 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 562 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 563 CeedVector v, CeedRequest *request) { 564 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 565 } 566 567 // LCOV_EXCL_START 568 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 569 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 570 CeedVector v, CeedRequest *request) { 571 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 572 } 573 // LCOV_EXCL_STOP 574 575 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 576 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 577 CeedVector v, CeedRequest *request) { 578 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 579 } 580 581 //------------------------------------------------------------------------------ 582 // ElemRestriction Apply 583 //------------------------------------------------------------------------------ 584 static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 585 CeedInt num_block, block_size, num_comp, comp_stride; 586 CeedElemRestriction_Ref *impl; 587 588 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 589 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 590 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 591 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 592 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 593 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 594 return CEED_ERROR_SUCCESS; 595 } 596 597 //------------------------------------------------------------------------------ 598 // ElemRestriction Apply Unsigned 599 //------------------------------------------------------------------------------ 600 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 601 CeedRequest *request) { 602 CeedInt num_block, block_size, num_comp, comp_stride; 603 CeedElemRestriction_Ref *impl; 604 605 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 606 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 607 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 608 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 609 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 610 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 611 return CEED_ERROR_SUCCESS; 612 } 613 614 //------------------------------------------------------------------------------ 615 // ElemRestriction Apply Unoriented 616 //------------------------------------------------------------------------------ 617 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 618 CeedRequest *request) { 619 CeedInt num_block, block_size, num_comp, comp_stride; 620 CeedElemRestriction_Ref *impl; 621 622 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 623 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 624 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 625 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 626 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 627 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 628 return CEED_ERROR_SUCCESS; 629 } 630 631 //------------------------------------------------------------------------------ 632 // ElemRestriction Apply Points 633 //------------------------------------------------------------------------------ 634 static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 635 CeedRequest *request) { 636 CeedInt num_comp; 637 CeedElemRestriction_Ref *impl; 638 639 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 640 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 641 return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 642 } 643 644 //------------------------------------------------------------------------------ 645 // ElemRestriction Apply Block 646 //------------------------------------------------------------------------------ 647 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 648 CeedRequest *request) { 649 CeedInt block_size, num_comp, comp_stride; 650 CeedElemRestriction_Ref *impl; 651 652 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 653 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 654 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 655 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 656 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 657 return CEED_ERROR_SUCCESS; 658 } 659 660 //------------------------------------------------------------------------------ 661 // ElemRestriction Get Offsets 662 //------------------------------------------------------------------------------ 663 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 664 Ceed ceed; 665 CeedElemRestriction_Ref *impl; 666 667 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 668 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 669 670 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 671 672 *offsets = impl->offsets; 673 return CEED_ERROR_SUCCESS; 674 } 675 676 //------------------------------------------------------------------------------ 677 // ElemRestriction Get Orientations 678 //------------------------------------------------------------------------------ 679 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 680 Ceed ceed; 681 CeedElemRestriction_Ref *impl; 682 683 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 684 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 685 686 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 687 688 *orients = impl->orients; 689 return CEED_ERROR_SUCCESS; 690 } 691 692 //------------------------------------------------------------------------------ 693 // ElemRestriction Get Curl-Conforming Orientations 694 //------------------------------------------------------------------------------ 695 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 696 Ceed ceed; 697 CeedElemRestriction_Ref *impl; 698 699 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 700 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 701 702 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 703 704 *curl_orients = impl->curl_orients; 705 return CEED_ERROR_SUCCESS; 706 } 707 708 //------------------------------------------------------------------------------ 709 // ElemRestriction Destroy 710 //------------------------------------------------------------------------------ 711 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 712 CeedElemRestriction_Ref *impl; 713 714 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 715 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 716 CeedCallBackend(CeedFree(&impl->orients_allocated)); 717 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 718 CeedCallBackend(CeedFree(&impl)); 719 return CEED_ERROR_SUCCESS; 720 } 721 722 //------------------------------------------------------------------------------ 723 // ElemRestriction Create 724 //------------------------------------------------------------------------------ 725 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 726 const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 727 Ceed ceed; 728 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 729 CeedRestrictionType rstr_type; 730 CeedElemRestriction_Ref *impl; 731 732 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 733 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 734 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 735 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 736 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 737 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 738 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 739 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 740 741 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 742 743 CeedCallBackend(CeedCalloc(1, &impl)); 744 CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 745 746 // Set layouts 747 { 748 bool has_backend_strides; 749 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 750 751 CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 752 if (rstr_type == CEED_RESTRICTION_STRIDED) { 753 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 754 if (has_backend_strides) { 755 CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 756 } 757 } 758 } 759 760 // Offsets data 761 if (rstr_type != CEED_RESTRICTION_STRIDED) { 762 const char *resource; 763 764 // Check indices for ref or memcheck backends 765 { 766 Ceed current = ceed, parent = NULL; 767 768 CeedCallBackend(CeedGetParent(current, &parent)); 769 while (current != parent) { 770 current = parent; 771 CeedCallBackend(CeedGetParent(current, &parent)); 772 } 773 CeedCallBackend(CeedGetResource(parent, &resource)); 774 } 775 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 776 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 777 CeedSize l_size; 778 779 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 780 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 781 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 782 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 783 } 784 } 785 786 // Copy data 787 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 788 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 789 switch (copy_mode) { 790 case CEED_COPY_VALUES: 791 CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 792 memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 793 impl->offsets = impl->offsets_allocated; 794 break; 795 case CEED_OWN_POINTER: 796 impl->offsets_allocated = (CeedInt *)offsets; 797 impl->offsets = impl->offsets_allocated; 798 break; 799 case CEED_USE_POINTER: 800 impl->offsets = offsets; 801 } 802 803 // Orientation data 804 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 805 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 806 switch (copy_mode) { 807 case CEED_COPY_VALUES: 808 CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 809 memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 810 impl->orients = impl->orients_allocated; 811 break; 812 case CEED_OWN_POINTER: 813 impl->orients_allocated = (bool *)orients; 814 impl->orients = impl->orients_allocated; 815 break; 816 case CEED_USE_POINTER: 817 impl->orients = orients; 818 } 819 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 820 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 821 switch (copy_mode) { 822 case CEED_COPY_VALUES: 823 CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 824 memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 825 impl->curl_orients = impl->curl_orients_allocated; 826 break; 827 case CEED_OWN_POINTER: 828 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 829 impl->curl_orients = impl->curl_orients_allocated; 830 break; 831 case CEED_USE_POINTER: 832 impl->curl_orients = curl_orients; 833 } 834 } 835 } 836 837 // Set apply function based upon num_comp, block_size, and comp_stride 838 CeedInt index = -1; 839 840 if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 841 switch (index) { 842 case 110: 843 impl->Apply = CeedElemRestrictionApply_Ref_110; 844 break; 845 case 111: 846 impl->Apply = CeedElemRestrictionApply_Ref_111; 847 break; 848 case 180: 849 impl->Apply = CeedElemRestrictionApply_Ref_180; 850 break; 851 case 181: 852 impl->Apply = CeedElemRestrictionApply_Ref_181; 853 break; 854 case 310: 855 impl->Apply = CeedElemRestrictionApply_Ref_310; 856 break; 857 case 311: 858 impl->Apply = CeedElemRestrictionApply_Ref_311; 859 break; 860 case 380: 861 impl->Apply = CeedElemRestrictionApply_Ref_380; 862 break; 863 case 381: 864 impl->Apply = CeedElemRestrictionApply_Ref_381; 865 break; 866 // LCOV_EXCL_START 867 case 510: 868 impl->Apply = CeedElemRestrictionApply_Ref_510; 869 break; 870 // LCOV_EXCL_STOP 871 case 511: 872 impl->Apply = CeedElemRestrictionApply_Ref_511; 873 break; 874 // LCOV_EXCL_START 875 case 580: 876 impl->Apply = CeedElemRestrictionApply_Ref_580; 877 break; 878 // LCOV_EXCL_STOP 879 case 581: 880 impl->Apply = CeedElemRestrictionApply_Ref_581; 881 break; 882 default: 883 impl->Apply = CeedElemRestrictionApply_Ref_Core; 884 break; 885 } 886 887 // Register backend functions 888 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 889 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 890 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 891 if (rstr_type == CEED_RESTRICTION_POINTS) { 892 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 893 } 894 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 895 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 896 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 897 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 898 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 899 return CEED_ERROR_SUCCESS; 900 } 901 902 //------------------------------------------------------------------------------ 903