1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 20 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, 21 CeedVector v, CeedRequest *request) { 22 CeedElemRestriction_Ref *impl; 23 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 24 const CeedScalar *uu; 25 CeedScalar *vv; 26 CeedInt num_elem, elem_size, v_offset; 27 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 28 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 29 v_offset = start * blk_size * elem_size * num_comp; 30 CeedRestrictionType rstr_type; 31 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 32 33 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 34 if (t_mode == CEED_TRANSPOSE) { 35 // Sum into for transpose mode, e-vec to l-vec 36 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 37 } else { 38 // Overwrite for notranspose mode, l-vec to e-vec 39 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 40 } 41 if (t_mode == CEED_NOTRANSPOSE) { 42 // Restriction from L-vector to E-vector 43 // Perform: v = r * u 44 // vv has shape [elem_size, num_comp, num_elem], row-major 45 // uu has shape [nnodes, num_comp] 46 switch (rstr_type) { 47 case CEED_RESTRICTION_STRIDED: { 48 // No offsets provided, Identity Restriction 49 bool has_backend_strides; 50 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 51 if (has_backend_strides) { 52 // CPU backend strides are {1, elem_size, elem_size*num_comp} 53 // This if branch is left separate to allow better inlining 54 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 55 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 56 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 57 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 58 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 59 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 60 } 61 } 62 } 63 } 64 } else { 65 // User provided strides 66 CeedInt strides[3]; 67 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 68 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 69 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 70 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 71 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 72 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 73 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 74 } 75 } 76 } 77 } 78 } 79 } break; 80 case CEED_RESTRICTION_DEFAULT: 81 // Default restriction with offsets 82 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 83 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 84 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 85 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = uu[impl->offsets[i + elem_size * e] + k * comp_stride]; 86 } 87 } 88 } 89 break; 90 case CEED_RESTRICTION_ORIENTED: 91 // Restriction with orientations 92 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 93 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 94 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 95 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 96 uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (use_orients && impl->orients[i + elem_size * e] ? -1.0 : 1.0); 97 } 98 } 99 } 100 break; 101 case CEED_RESTRICTION_CURL_ORIENTED: 102 // Restriction with tridiagonal transformation 103 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 104 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 105 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 106 CeedInt ii = 3 * blk_size * (i / blk_size) + i % blk_size; 107 if (use_orients) { 108 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 109 uu[impl->offsets[i + 0 * blk_size + elem_size * e] + k * comp_stride] * 110 impl->curl_orients[ii + 1 * blk_size + 3 * elem_size * e] + 111 (i >= blk_size ? uu[impl->offsets[i - 1 * blk_size + elem_size * e] + k * comp_stride] * 112 impl->curl_orients[ii + 0 * blk_size + 3 * elem_size * e] 113 : 0.0) + 114 (i < (elem_size - 1) * blk_size ? uu[impl->offsets[i + 1 * blk_size + elem_size * e] + k * comp_stride] * 115 impl->curl_orients[ii + 2 * blk_size + 3 * elem_size * e] 116 : 0.0); 117 } else { 118 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 119 uu[impl->offsets[i + 0 * blk_size + elem_size * e] + k * comp_stride] * 120 abs(impl->curl_orients[ii + 1 * blk_size + 3 * elem_size * e]) + 121 (i >= blk_size ? uu[impl->offsets[i - 1 * blk_size + elem_size * e] + k * comp_stride] * 122 abs(impl->curl_orients[ii + 0 * blk_size + 3 * elem_size * e]) 123 : 0.0) + 124 (i < (elem_size - 1) * blk_size ? uu[impl->offsets[i + 1 * blk_size + elem_size * e] + k * comp_stride] * 125 abs(impl->curl_orients[ii + 2 * blk_size + 3 * elem_size * e]) 126 : 0.0); 127 } 128 } 129 } 130 } 131 break; 132 } 133 } else { 134 // Restriction from E-vector to L-vector 135 // Performing v += r^T * u 136 // uu has shape [elem_size, num_comp, num_elem], row-major 137 // vv has shape [nnodes, num_comp] 138 switch (rstr_type) { 139 case CEED_RESTRICTION_STRIDED: { 140 // No offsets provided, Identity Restriction 141 bool has_backend_strides; 142 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 143 if (has_backend_strides) { 144 // CPU backend strides are {1, elem_size, elem_size*num_comp} 145 // This if brach is left separate to allow better inlining 146 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 147 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 148 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 149 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 150 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 151 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 152 } 153 } 154 } 155 } 156 } else { 157 // User provided strides 158 CeedInt strides[3]; 159 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 160 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 161 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 162 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 163 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 164 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 165 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 166 } 167 } 168 } 169 } 170 } 171 } break; 172 case CEED_RESTRICTION_DEFAULT: 173 // Default restriction with offsets 174 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 175 for (CeedInt k = 0; k < num_comp; k++) { 176 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 177 // Iteration bound set to discard padding elements 178 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 179 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset]; 180 } 181 } 182 } 183 } 184 break; 185 case CEED_RESTRICTION_ORIENTED: 186 // Restriction with orientations 187 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 188 for (CeedInt k = 0; k < num_comp; k++) { 189 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 190 // Iteration bound set to discard padding elements 191 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 192 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 193 uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0); 194 } 195 } 196 } 197 } 198 break; 199 case CEED_RESTRICTION_CURL_ORIENTED: 200 // Restriction with tridiagonal transformation 201 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 202 for (CeedInt k = 0; k < num_comp; k++) { 203 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 204 // Iteration bound set to discard padding elements 205 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 206 CeedInt jj = 3 * blk_size * (j / blk_size + 0) + j % blk_size; 207 if (use_orients) { 208 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 209 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 * blk_size - v_offset] * 210 impl->curl_orients[jj + 1 * blk_size + e * 3 * elem_size] + 211 (j >= blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j - 1 * blk_size - v_offset] * 212 impl->curl_orients[jj - 1 * blk_size + e * 3 * elem_size] 213 : 0.0) + 214 (j < (elem_size - 1) * blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j + 1 * blk_size - v_offset] * 215 impl->curl_orients[jj + 3 * blk_size + e * 3 * elem_size] 216 : 0.0); 217 } else { 218 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 219 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 * blk_size - v_offset] * 220 abs(impl->curl_orients[jj + 1 * blk_size + e * 3 * elem_size]) + 221 (j >= blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j - 1 * blk_size - v_offset] * 222 abs(impl->curl_orients[jj - 1 * blk_size + e * 3 * elem_size]) 223 : 0.0) + 224 (j < (elem_size - 1) * blk_size ? uu[elem_size * (k * blk_size + num_comp * e) + j + 1 * blk_size - v_offset] * 225 abs(impl->curl_orients[jj + 3 * blk_size + e * 3 * elem_size]) 226 : 0.0); 227 } 228 } 229 } 230 } 231 } 232 break; 233 } 234 } 235 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 236 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 237 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 238 return CEED_ERROR_SUCCESS; 239 } 240 241 //------------------------------------------------------------------------------ 242 // ElemRestriction Apply - Common Sizes 243 //------------------------------------------------------------------------------ 244 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 245 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 246 CeedRequest *request) { 247 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request); 248 } 249 250 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 251 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 252 CeedRequest *request) { 253 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orients, t_mode, u, v, request); 254 } 255 256 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 257 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 258 CeedRequest *request) { 259 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request); 260 } 261 262 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 263 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 264 CeedRequest *request) { 265 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orients, t_mode, u, v, request); 266 } 267 268 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 269 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 270 CeedRequest *request) { 271 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request); 272 } 273 274 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 275 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 276 CeedRequest *request) { 277 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orients, t_mode, u, v, request); 278 } 279 280 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 281 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 282 CeedRequest *request) { 283 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request); 284 } 285 286 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 287 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 288 CeedRequest *request) { 289 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orients, t_mode, u, v, request); 290 } 291 292 // LCOV_EXCL_START 293 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 294 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 295 CeedRequest *request) { 296 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request); 297 } 298 // LCOV_EXCL_STOP 299 300 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 301 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 302 CeedRequest *request) { 303 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orients, t_mode, u, v, request); 304 } 305 306 // LCOV_EXCL_START 307 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 308 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 309 CeedRequest *request) { 310 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request); 311 } 312 // LCOV_EXCL_STOP 313 314 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 315 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 316 CeedRequest *request) { 317 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orients, t_mode, u, v, request); 318 } 319 320 //------------------------------------------------------------------------------ 321 // ElemRestriction Apply 322 //------------------------------------------------------------------------------ 323 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 324 CeedInt num_blk, blk_size, num_comp, comp_stride; 325 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 326 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 327 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 328 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 329 CeedElemRestriction_Ref *impl; 330 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 331 332 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request); 333 } 334 335 //------------------------------------------------------------------------------ 336 // ElemRestriction Apply Unsigned 337 //------------------------------------------------------------------------------ 338 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 339 CeedInt num_blk, blk_size, num_comp, comp_stride; 340 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 341 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 342 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 343 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 344 CeedElemRestriction_Ref *impl; 345 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 346 347 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request); 348 } 349 350 //------------------------------------------------------------------------------ 351 // ElemRestriction Apply Block 352 //------------------------------------------------------------------------------ 353 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 354 CeedRequest *request) { 355 CeedInt blk_size, num_comp, comp_stride; 356 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 357 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 358 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 359 CeedElemRestriction_Ref *impl; 360 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 361 362 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request); 363 } 364 365 //------------------------------------------------------------------------------ 366 // ElemRestriction Get Offsets 367 //------------------------------------------------------------------------------ 368 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 369 CeedElemRestriction_Ref *impl; 370 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 371 Ceed ceed; 372 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 373 374 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 375 376 *offsets = impl->offsets; 377 return CEED_ERROR_SUCCESS; 378 } 379 380 //------------------------------------------------------------------------------ 381 // ElemRestriction Get Orientations 382 //------------------------------------------------------------------------------ 383 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 384 CeedElemRestriction_Ref *impl; 385 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 386 Ceed ceed; 387 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 388 389 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 390 391 *orients = impl->orients; 392 return CEED_ERROR_SUCCESS; 393 } 394 395 //------------------------------------------------------------------------------ 396 // ElemRestriction Get Curl-Conforming Orientations 397 //------------------------------------------------------------------------------ 398 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **curl_orients) { 399 CeedElemRestriction_Ref *impl; 400 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 401 Ceed ceed; 402 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 403 404 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 405 406 *curl_orients = impl->curl_orients; 407 return CEED_ERROR_SUCCESS; 408 } 409 410 //------------------------------------------------------------------------------ 411 // ElemRestriction Destroy 412 //------------------------------------------------------------------------------ 413 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 414 CeedElemRestriction_Ref *impl; 415 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 416 417 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 418 CeedCallBackend(CeedFree(&impl->orients_allocated)); 419 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 420 CeedCallBackend(CeedFree(&impl)); 421 return CEED_ERROR_SUCCESS; 422 } 423 424 //------------------------------------------------------------------------------ 425 // ElemRestriction Create 426 //------------------------------------------------------------------------------ 427 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 428 const CeedInt *curl_orients, CeedElemRestriction r) { 429 CeedElemRestriction_Ref *impl; 430 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 431 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 432 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 433 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 434 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 435 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 436 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 437 Ceed ceed; 438 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 439 440 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 441 CeedCallBackend(CeedCalloc(1, &impl)); 442 443 // Offsets data 444 CeedRestrictionType rstr_type; 445 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 446 if (rstr_type != CEED_RESTRICTION_STRIDED) { 447 // Check indices for ref or memcheck backends 448 Ceed parent_ceed = ceed, curr_ceed = NULL; 449 while (parent_ceed != curr_ceed) { 450 curr_ceed = parent_ceed; 451 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 452 } 453 const char *resource; 454 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 455 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 456 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 457 CeedSize l_size; 458 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 459 460 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 461 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 462 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 463 } 464 } 465 466 // Copy data 467 switch (copy_mode) { 468 case CEED_COPY_VALUES: 469 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 470 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 471 impl->offsets = impl->offsets_allocated; 472 break; 473 case CEED_OWN_POINTER: 474 impl->offsets_allocated = (CeedInt *)offsets; 475 impl->offsets = impl->offsets_allocated; 476 break; 477 case CEED_USE_POINTER: 478 impl->offsets = offsets; 479 } 480 481 // Orientation data 482 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 483 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 484 switch (copy_mode) { 485 case CEED_COPY_VALUES: 486 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 487 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 488 impl->orients = impl->orients_allocated; 489 break; 490 case CEED_OWN_POINTER: 491 impl->orients_allocated = (bool *)orients; 492 impl->orients = impl->orients_allocated; 493 break; 494 case CEED_USE_POINTER: 495 impl->orients = orients; 496 } 497 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 498 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 499 switch (copy_mode) { 500 case CEED_COPY_VALUES: 501 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 502 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 503 impl->curl_orients = impl->curl_orients_allocated; 504 break; 505 case CEED_OWN_POINTER: 506 impl->curl_orients_allocated = (CeedInt *)curl_orients; 507 impl->curl_orients = impl->curl_orients_allocated; 508 break; 509 case CEED_USE_POINTER: 510 impl->curl_orients = curl_orients; 511 } 512 } 513 } 514 515 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 516 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 517 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 518 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 519 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 520 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 521 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 522 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 523 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 524 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 525 526 // Set apply function based upon num_comp, blk_size, and comp_stride 527 CeedInt idx = -1; 528 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 529 switch (idx) { 530 case 110: 531 impl->Apply = CeedElemRestrictionApply_Ref_110; 532 break; 533 case 111: 534 impl->Apply = CeedElemRestrictionApply_Ref_111; 535 break; 536 case 180: 537 impl->Apply = CeedElemRestrictionApply_Ref_180; 538 break; 539 case 181: 540 impl->Apply = CeedElemRestrictionApply_Ref_181; 541 break; 542 case 310: 543 impl->Apply = CeedElemRestrictionApply_Ref_310; 544 break; 545 case 311: 546 impl->Apply = CeedElemRestrictionApply_Ref_311; 547 break; 548 case 380: 549 impl->Apply = CeedElemRestrictionApply_Ref_380; 550 break; 551 case 381: 552 impl->Apply = CeedElemRestrictionApply_Ref_381; 553 break; 554 // LCOV_EXCL_START 555 case 510: 556 impl->Apply = CeedElemRestrictionApply_Ref_510; 557 break; 558 // LCOV_EXCL_STOP 559 case 511: 560 impl->Apply = CeedElemRestrictionApply_Ref_511; 561 break; 562 // LCOV_EXCL_START 563 case 580: 564 impl->Apply = CeedElemRestrictionApply_Ref_580; 565 break; 566 // LCOV_EXCL_STOP 567 case 581: 568 impl->Apply = CeedElemRestrictionApply_Ref_581; 569 break; 570 default: 571 impl->Apply = CeedElemRestrictionApply_Ref_Core; 572 break; 573 } 574 575 return CEED_ERROR_SUCCESS; 576 } 577 578 //------------------------------------------------------------------------------ 579