1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 20 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 21 CeedSize v_offset, const CeedScalar *__restrict__ uu, 22 CeedScalar *__restrict__ vv) { 23 // No offsets provided, identity restriction 24 bool has_backend_strides; 25 26 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 27 if (has_backend_strides) { 28 // CPU backend strides are {1, elem_size, elem_size*num_comp} 29 // This if branch is left separate to allow better inlining 30 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 31 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 32 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 33 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 34 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 35 uu[n + k * (CeedSize)elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * (CeedSize)num_comp]; 36 } 37 } 38 } 39 } 40 } else { 41 // User provided strides 42 CeedInt strides[3]; 43 44 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 45 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 46 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 47 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 48 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 49 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 50 uu[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + CeedIntMin(e + j, num_elem - 1) * (CeedSize)strides[2]]; 51 } 52 } 53 } 54 } 55 } 56 return CEED_ERROR_SUCCESS; 57 } 58 59 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 60 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 61 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 62 CeedScalar *__restrict__ vv) { 63 // Default restriction with offsets 64 CeedElemRestriction_Ref *impl; 65 66 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 67 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 68 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 69 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 70 vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 71 } 72 } 73 } 74 return CEED_ERROR_SUCCESS; 75 } 76 77 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 78 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 79 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 80 CeedScalar *__restrict__ vv) { 81 // Restriction with orientations 82 CeedElemRestriction_Ref *impl; 83 84 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 85 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 86 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 87 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 88 vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = 89 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 90 } 91 } 92 } 93 return CEED_ERROR_SUCCESS; 94 } 95 96 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 97 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 98 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 99 CeedScalar *__restrict__ vv) { 100 // Restriction with tridiagonal transformation 101 CeedElemRestriction_Ref *impl; 102 103 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 104 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 105 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 106 CeedInt n = 0; 107 108 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 109 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 110 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 111 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 112 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 113 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 114 } 115 CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 116 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 117 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 118 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 119 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 120 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 121 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 122 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 123 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 124 } 125 } 126 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 127 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 128 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 129 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 130 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 131 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 132 } 133 } 134 } 135 return CEED_ERROR_SUCCESS; 136 } 137 138 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 139 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 140 CeedInt stop, CeedInt num_elem, CeedInt elem_size, 141 CeedSize v_offset, const CeedScalar *__restrict__ uu, 142 CeedScalar *__restrict__ vv) { 143 // Restriction with (unsigned) tridiagonal transformation 144 CeedElemRestriction_Ref *impl; 145 146 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 147 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 148 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 149 CeedInt n = 0; 150 151 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 152 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 153 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 154 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 155 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 156 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 157 } 158 CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 159 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 160 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 161 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 162 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 163 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 164 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 165 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 166 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 167 } 168 } 169 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 170 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 171 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 172 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 173 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 174 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 175 } 176 } 177 } 178 return CEED_ERROR_SUCCESS; 179 } 180 181 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 182 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 183 CeedSize v_offset, const CeedScalar *__restrict__ uu, 184 CeedScalar *__restrict__ vv) { 185 // No offsets provided, identity restriction 186 bool has_backend_strides; 187 188 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 189 if (has_backend_strides) { 190 // CPU backend strides are {1, elem_size, elem_size*num_comp} 191 // This if brach is left separate to allow better inlining 192 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 193 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 194 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 195 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 196 vv[n + k * (CeedSize)elem_size + (e + j) * elem_size * (CeedSize)num_comp] += 197 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset]; 198 } 199 } 200 } 201 } 202 } else { 203 // User provided strides 204 CeedInt strides[3]; 205 206 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 207 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 208 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 209 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 210 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 211 vv[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + (e + j) * (CeedSize)strides[2]] += 212 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset]; 213 } 214 } 215 } 216 } 217 } 218 return CEED_ERROR_SUCCESS; 219 } 220 221 static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 222 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 223 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 224 CeedScalar *__restrict__ vv) { 225 // Default restriction with offsets 226 CeedElemRestriction_Ref *impl; 227 228 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 229 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 230 for (CeedInt k = 0; k < num_comp; k++) { 231 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 232 // Iteration bound set to discard padding elements 233 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 234 CeedScalar vv_loc; 235 236 vv_loc = uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset]; 237 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 238 } 239 } 240 } 241 } 242 return CEED_ERROR_SUCCESS; 243 } 244 245 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 246 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 247 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 248 CeedScalar *__restrict__ vv) { 249 // Restriction with orientations 250 CeedElemRestriction_Ref *impl; 251 252 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 253 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 254 for (CeedInt k = 0; k < num_comp; k++) { 255 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 256 // Iteration bound set to discard padding elements 257 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 258 CeedScalar vv_loc; 259 260 vv_loc = 261 uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 262 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 263 } 264 } 265 } 266 } 267 return CEED_ERROR_SUCCESS; 268 } 269 270 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 271 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 272 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 273 CeedScalar *__restrict__ vv) { 274 // Restriction with tridiagonal transformation 275 CeedElemRestriction_Ref *impl; 276 CeedScalar vv_loc[block_size]; 277 278 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 279 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 280 for (CeedInt k = 0; k < num_comp; k++) { 281 // Iteration bound set to discard padding elements 282 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 283 CeedInt n = 0; 284 285 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 286 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 287 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 288 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 289 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 290 } 291 for (CeedInt j = 0; j < block_end; j++) { 292 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 293 } 294 for (n = 1; n < elem_size - 1; n++) { 295 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 296 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 297 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 298 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 299 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 300 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 301 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 302 } 303 for (CeedInt j = 0; j < block_end; j++) { 304 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 305 } 306 } 307 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 308 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 309 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 310 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 311 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 312 } 313 for (CeedInt j = 0; j < block_end; j++) { 314 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 315 } 316 } 317 } 318 return CEED_ERROR_SUCCESS; 319 } 320 321 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 322 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 323 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, 324 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 325 // Restriction with (unsigned) tridiagonal transformation 326 CeedElemRestriction_Ref *impl; 327 CeedScalar vv_loc[block_size]; 328 329 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 330 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 331 for (CeedInt k = 0; k < num_comp; k++) { 332 // Iteration bound set to discard padding elements 333 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 334 CeedInt n = 0; 335 336 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 337 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 338 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 339 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 340 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 341 } 342 for (CeedInt j = 0; j < block_end; j++) { 343 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 344 } 345 for (n = 1; n < elem_size - 1; n++) { 346 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 347 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 348 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 349 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 350 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 351 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 352 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 353 } 354 for (CeedInt j = 0; j < block_end; j++) { 355 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 356 } 357 } 358 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 359 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 360 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 361 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 362 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 363 } 364 for (CeedInt j = 0; j < block_end; j++) { 365 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 366 } 367 } 368 } 369 return CEED_ERROR_SUCCESS; 370 } 371 372 static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 373 CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 374 CeedScalar *__restrict__ vv) { 375 CeedInt num_points, l_vec_offset; 376 CeedSize e_vec_offset = 0; 377 CeedElemRestriction_Ref *impl; 378 379 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 380 for (CeedInt e = start; e < stop; e++) { 381 l_vec_offset = impl->offsets[e]; 382 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 383 if (t_mode == CEED_NOTRANSPOSE) { 384 for (CeedInt i = 0; i < num_points; i++) { 385 for (CeedInt j = 0; j < num_comp; j++) vv[j * (CeedSize)num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 386 } 387 } else { 388 for (CeedInt i = 0; i < num_points; i++) { 389 for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * (CeedSize)num_points + i + e_vec_offset]; 390 } 391 } 392 e_vec_offset += num_points * (CeedSize)num_comp; 393 } 394 return CEED_ERROR_SUCCESS; 395 } 396 397 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 398 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, 399 bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 400 CeedInt num_elem, elem_size; 401 CeedSize v_offset = 0; 402 CeedRestrictionType rstr_type; 403 const CeedScalar *uu; 404 CeedScalar *vv; 405 406 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 407 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 408 v_offset = start * block_size * elem_size * (CeedSize)num_comp; 409 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 410 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 411 412 if (t_mode == CEED_TRANSPOSE) { 413 // Sum into for transpose mode, E-vector to L-vector 414 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 415 } else { 416 // Overwrite for notranspose mode, L-vector to E-vector 417 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 418 } 419 if (t_mode == CEED_TRANSPOSE) { 420 // Restriction from E-vector to L-vector 421 // Performing v += r^T * u 422 // uu has shape [elem_size, num_comp, num_elem], row-major 423 // vv has shape [nnodes, num_comp] 424 // Sum into for transpose mode 425 switch (rstr_type) { 426 case CEED_RESTRICTION_STRIDED: 427 CeedCallBackend( 428 CeedElemRestrictionApplyStridedTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 429 break; 430 case CEED_RESTRICTION_STANDARD: 431 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 432 v_offset, uu, vv)); 433 break; 434 case CEED_RESTRICTION_ORIENTED: 435 if (use_signs) { 436 CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 437 elem_size, v_offset, uu, vv)); 438 } else { 439 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 440 v_offset, uu, vv)); 441 } 442 break; 443 case CEED_RESTRICTION_CURL_ORIENTED: 444 if (use_signs && use_orients) { 445 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 446 elem_size, v_offset, uu, vv)); 447 } else if (use_orients) { 448 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 449 num_elem, elem_size, v_offset, uu, vv)); 450 } else { 451 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 452 v_offset, uu, vv)); 453 } 454 break; 455 case CEED_RESTRICTION_POINTS: 456 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 457 break; 458 } 459 } else { 460 // Restriction from L-vector to E-vector 461 // Perform: v = r * u 462 // vv has shape [elem_size, num_comp, num_elem], row-major 463 // uu has shape [nnodes, num_comp] 464 // Overwrite for notranspose mode 465 switch (rstr_type) { 466 case CEED_RESTRICTION_STRIDED: 467 CeedCallBackend( 468 CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 469 break; 470 case CEED_RESTRICTION_STANDARD: 471 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, elem_size, 472 v_offset, uu, vv)); 473 break; 474 case CEED_RESTRICTION_ORIENTED: 475 if (use_signs) { 476 CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 477 elem_size, v_offset, uu, vv)); 478 } else { 479 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 480 elem_size, v_offset, uu, vv)); 481 } 482 break; 483 case CEED_RESTRICTION_CURL_ORIENTED: 484 if (use_signs && use_orients) { 485 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 486 elem_size, v_offset, uu, vv)); 487 } else if (use_orients) { 488 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, 489 num_elem, elem_size, v_offset, uu, vv)); 490 } else { 491 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 492 elem_size, v_offset, uu, vv)); 493 } 494 break; 495 case CEED_RESTRICTION_POINTS: 496 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Ref_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 497 break; 498 } 499 } 500 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 501 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 502 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 503 return CEED_ERROR_SUCCESS; 504 } 505 506 //------------------------------------------------------------------------------ 507 // ElemRestriction Apply - Common Sizes 508 //------------------------------------------------------------------------------ 509 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 510 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 511 CeedVector v, CeedRequest *request) { 512 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 513 } 514 515 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 516 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 517 CeedVector v, CeedRequest *request) { 518 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 519 } 520 521 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 522 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 523 CeedVector v, CeedRequest *request) { 524 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 525 } 526 527 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 528 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 529 CeedVector v, CeedRequest *request) { 530 return CeedElemRestrictionApply_Ref_Core(rstr, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 531 } 532 533 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 534 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 535 CeedVector v, CeedRequest *request) { 536 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 537 } 538 539 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 540 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 541 CeedVector v, CeedRequest *request) { 542 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 543 } 544 545 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 546 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 547 CeedVector v, CeedRequest *request) { 548 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 549 } 550 551 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 552 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 553 CeedVector v, CeedRequest *request) { 554 return CeedElemRestrictionApply_Ref_Core(rstr, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 555 } 556 557 // LCOV_EXCL_START 558 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 559 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 560 CeedVector v, CeedRequest *request) { 561 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 562 } 563 // LCOV_EXCL_STOP 564 565 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 566 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 567 CeedVector v, CeedRequest *request) { 568 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 569 } 570 571 // LCOV_EXCL_START 572 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 573 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 574 CeedVector v, CeedRequest *request) { 575 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 576 } 577 // LCOV_EXCL_STOP 578 579 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, 580 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 581 CeedVector v, CeedRequest *request) { 582 return CeedElemRestrictionApply_Ref_Core(rstr, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 583 } 584 585 //------------------------------------------------------------------------------ 586 // ElemRestriction Apply 587 //------------------------------------------------------------------------------ 588 static int CeedElemRestrictionApply_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 589 CeedInt num_block, block_size, num_comp, comp_stride; 590 CeedElemRestriction_Ref *impl; 591 592 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 593 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 594 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 595 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 596 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 597 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 598 return CEED_ERROR_SUCCESS; 599 } 600 601 //------------------------------------------------------------------------------ 602 // ElemRestriction Apply Unsigned 603 //------------------------------------------------------------------------------ 604 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 605 CeedRequest *request) { 606 CeedInt num_block, block_size, num_comp, comp_stride; 607 CeedElemRestriction_Ref *impl; 608 609 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 610 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 611 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 612 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 613 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 614 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 615 return CEED_ERROR_SUCCESS; 616 } 617 618 //------------------------------------------------------------------------------ 619 // ElemRestriction Apply Unoriented 620 //------------------------------------------------------------------------------ 621 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 622 CeedRequest *request) { 623 CeedInt num_block, block_size, num_comp, comp_stride; 624 CeedElemRestriction_Ref *impl; 625 626 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 627 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 628 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 629 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 630 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 631 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 632 return CEED_ERROR_SUCCESS; 633 } 634 635 //------------------------------------------------------------------------------ 636 // ElemRestriction Apply Points 637 //------------------------------------------------------------------------------ 638 static int CeedElemRestrictionApplyAtPointsInElement_Ref(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 639 CeedRequest *request) { 640 CeedInt num_comp; 641 CeedElemRestriction_Ref *impl; 642 643 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 644 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 645 return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 646 } 647 648 //------------------------------------------------------------------------------ 649 // ElemRestriction Apply Block 650 //------------------------------------------------------------------------------ 651 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 652 CeedRequest *request) { 653 CeedInt block_size, num_comp, comp_stride; 654 CeedElemRestriction_Ref *impl; 655 656 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 657 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 658 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 659 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 660 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 661 return CEED_ERROR_SUCCESS; 662 } 663 664 //------------------------------------------------------------------------------ 665 // ElemRestriction Get Offsets 666 //------------------------------------------------------------------------------ 667 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 668 CeedElemRestriction_Ref *impl; 669 670 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 671 672 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 673 674 *offsets = impl->offsets; 675 return CEED_ERROR_SUCCESS; 676 } 677 678 //------------------------------------------------------------------------------ 679 // ElemRestriction Get Orientations 680 //------------------------------------------------------------------------------ 681 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 682 CeedElemRestriction_Ref *impl; 683 684 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 685 686 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 687 688 *orients = impl->orients; 689 return CEED_ERROR_SUCCESS; 690 } 691 692 //------------------------------------------------------------------------------ 693 // ElemRestriction Get Curl-Conforming Orientations 694 //------------------------------------------------------------------------------ 695 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 696 CeedElemRestriction_Ref *impl; 697 698 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 699 700 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 701 702 *curl_orients = impl->curl_orients; 703 return CEED_ERROR_SUCCESS; 704 } 705 706 //------------------------------------------------------------------------------ 707 // ElemRestriction Destroy 708 //------------------------------------------------------------------------------ 709 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction rstr) { 710 CeedElemRestriction_Ref *impl; 711 712 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 713 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 714 CeedCallBackend(CeedFree(&impl->orients_allocated)); 715 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 716 CeedCallBackend(CeedFree(&impl)); 717 return CEED_ERROR_SUCCESS; 718 } 719 720 //------------------------------------------------------------------------------ 721 // ElemRestriction Create 722 //------------------------------------------------------------------------------ 723 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 724 const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 725 Ceed ceed; 726 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 727 CeedRestrictionType rstr_type; 728 CeedElemRestriction_Ref *impl; 729 730 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 731 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 732 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 733 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 734 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 735 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 736 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 737 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 738 739 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 740 741 CeedCallBackend(CeedCalloc(1, &impl)); 742 CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 743 744 // Set layouts 745 { 746 bool has_backend_strides; 747 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 748 749 CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 750 if (rstr_type == CEED_RESTRICTION_STRIDED) { 751 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 752 if (has_backend_strides) { 753 CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 754 } 755 } 756 } 757 758 // Offsets data 759 if (rstr_type != CEED_RESTRICTION_STRIDED) { 760 const char *resource; 761 762 // Check indices for ref or memcheck backends 763 { 764 Ceed current = ceed, parent = NULL; 765 766 CeedCallBackend(CeedGetParent(current, &parent)); 767 while (current != parent) { 768 current = parent; 769 CeedCallBackend(CeedGetParent(current, &parent)); 770 } 771 CeedCallBackend(CeedGetResource(parent, &resource)); 772 } 773 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 774 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 775 CeedSize l_size; 776 777 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 778 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 779 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 780 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 781 } 782 } 783 784 // Copy data 785 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 786 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 787 switch (copy_mode) { 788 case CEED_COPY_VALUES: 789 CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 790 memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 791 impl->offsets = impl->offsets_allocated; 792 break; 793 case CEED_OWN_POINTER: 794 impl->offsets_allocated = (CeedInt *)offsets; 795 impl->offsets = impl->offsets_allocated; 796 break; 797 case CEED_USE_POINTER: 798 impl->offsets = offsets; 799 } 800 801 // Orientation data 802 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 803 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 804 switch (copy_mode) { 805 case CEED_COPY_VALUES: 806 CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 807 memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 808 impl->orients = impl->orients_allocated; 809 break; 810 case CEED_OWN_POINTER: 811 impl->orients_allocated = (bool *)orients; 812 impl->orients = impl->orients_allocated; 813 break; 814 case CEED_USE_POINTER: 815 impl->orients = orients; 816 } 817 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 818 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 819 switch (copy_mode) { 820 case CEED_COPY_VALUES: 821 CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 822 memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 823 impl->curl_orients = impl->curl_orients_allocated; 824 break; 825 case CEED_OWN_POINTER: 826 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 827 impl->curl_orients = impl->curl_orients_allocated; 828 break; 829 case CEED_USE_POINTER: 830 impl->curl_orients = curl_orients; 831 } 832 } 833 } 834 835 // Set apply function based upon num_comp, block_size, and comp_stride 836 CeedInt index = -1; 837 838 if (block_size < 10) index = 100 * num_comp + 10 * block_size + (comp_stride == 1); 839 switch (index) { 840 case 110: 841 impl->Apply = CeedElemRestrictionApply_Ref_110; 842 break; 843 case 111: 844 impl->Apply = CeedElemRestrictionApply_Ref_111; 845 break; 846 case 180: 847 impl->Apply = CeedElemRestrictionApply_Ref_180; 848 break; 849 case 181: 850 impl->Apply = CeedElemRestrictionApply_Ref_181; 851 break; 852 case 310: 853 impl->Apply = CeedElemRestrictionApply_Ref_310; 854 break; 855 case 311: 856 impl->Apply = CeedElemRestrictionApply_Ref_311; 857 break; 858 case 380: 859 impl->Apply = CeedElemRestrictionApply_Ref_380; 860 break; 861 case 381: 862 impl->Apply = CeedElemRestrictionApply_Ref_381; 863 break; 864 // LCOV_EXCL_START 865 case 510: 866 impl->Apply = CeedElemRestrictionApply_Ref_510; 867 break; 868 // LCOV_EXCL_STOP 869 case 511: 870 impl->Apply = CeedElemRestrictionApply_Ref_511; 871 break; 872 // LCOV_EXCL_START 873 case 580: 874 impl->Apply = CeedElemRestrictionApply_Ref_580; 875 break; 876 // LCOV_EXCL_STOP 877 case 581: 878 impl->Apply = CeedElemRestrictionApply_Ref_581; 879 break; 880 default: 881 impl->Apply = CeedElemRestrictionApply_Ref_Core; 882 break; 883 } 884 885 // Register backend functions 886 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Ref)); 887 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 888 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 889 if (rstr_type == CEED_RESTRICTION_POINTS) { 890 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Ref)); 891 } 892 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 893 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 894 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 895 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 896 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Ref)); 897 return CEED_ERROR_SUCCESS; 898 } 899 900 //------------------------------------------------------------------------------ 901