1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <string.h> 12 13 #include "ceed-ref.h" 14 15 //------------------------------------------------------------------------------ 16 // Core ElemRestriction Apply Code 17 //------------------------------------------------------------------------------ 18 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 19 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 20 CeedRequest *request) { 21 CeedElemRestriction_Ref *impl; 22 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 23 const CeedScalar *uu; 24 CeedScalar *vv; 25 CeedInt num_elem, elem_size, v_offset; 26 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 27 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 28 v_offset = start * blk_size * elem_size * num_comp; 29 30 bool is_oriented; 31 CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented)); 32 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 33 if (t_mode == CEED_TRANSPOSE) { 34 // Sum into for transpose mode, e-vec to l-vec 35 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 36 } else { 37 // Overwrite for notranspose mode, l-vec to e-vec 38 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 39 } 40 // Restriction from L-vector to E-vector 41 // Perform: v = r * u 42 if (t_mode == CEED_NOTRANSPOSE) { 43 // No offsets provided, Identity Restriction 44 if (!impl->offsets) { 45 bool has_backend_strides; 46 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47 if (has_backend_strides) { 48 // CPU backend strides are {1, elem_size, elem_size*num_comp} 49 // This if branch is left separate to allow better inlining 50 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 51 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 52 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 53 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 54 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 55 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 56 } 57 } 58 } 59 } 60 } else { 61 // User provided strides 62 CeedInt strides[3]; 63 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 64 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 65 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 66 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 67 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 68 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 69 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 70 } 71 } 72 } 73 } 74 } 75 } else { 76 // Offsets provided, standard or blocked restriction 77 // vv has shape [elem_size, num_comp, num_elem], row-major 78 // uu has shape [nnodes, num_comp] 79 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 80 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 81 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 82 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 83 uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.); 84 } 85 } 86 } 87 } 88 } else { 89 // Restriction from E-vector to L-vector 90 // Performing v += r^T * u 91 // No offsets provided, Identity Restriction 92 if (!impl->offsets) { 93 bool has_backend_strides; 94 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 95 if (has_backend_strides) { 96 // CPU backend strides are {1, elem_size, elem_size*num_comp} 97 // This if brach is left separate to allow better inlining 98 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 99 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 100 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 101 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 102 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 103 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 104 } 105 } 106 } 107 } 108 } else { 109 // User provided strides 110 CeedInt strides[3]; 111 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 112 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 113 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 114 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 115 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 116 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 117 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 118 } 119 } 120 } 121 } 122 } 123 } else { 124 // Offsets provided, standard or blocked restriction 125 // uu has shape [elem_size, num_comp, num_elem] 126 // vv has shape [nnodes, num_comp] 127 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 128 for (CeedInt k = 0; k < num_comp; k++) { 129 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 130 // Iteration bound set to discard padding elements 131 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 132 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 133 uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (is_oriented && impl->orient[j + e * elem_size] ? -1. : 1.); 134 } 135 } 136 } 137 } 138 } 139 } 140 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 141 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 142 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 143 return CEED_ERROR_SUCCESS; 144 } 145 146 //------------------------------------------------------------------------------ 147 // ElemRestriction Apply - Common Sizes 148 //------------------------------------------------------------------------------ 149 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 150 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 151 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, u, v, request); 152 } 153 154 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 155 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 156 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, u, v, request); 157 } 158 159 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 160 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 161 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, u, v, request); 162 } 163 164 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 165 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 166 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, u, v, request); 167 } 168 169 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 170 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 171 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, u, v, request); 172 } 173 174 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 175 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 176 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, u, v, request); 177 } 178 179 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 180 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 181 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, u, v, request); 182 } 183 184 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 185 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 186 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, u, v, request); 187 } 188 189 // LCOV_EXCL_START 190 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 191 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 192 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, u, v, request); 193 } 194 // LCOV_EXCL_STOP 195 196 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 197 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 198 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, u, v, request); 199 } 200 201 // LCOV_EXCL_START 202 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 203 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 204 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, u, v, request); 205 } 206 // LCOV_EXCL_STOP 207 208 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 209 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 210 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, u, v, request); 211 } 212 213 //------------------------------------------------------------------------------ 214 // ElemRestriction Apply 215 //------------------------------------------------------------------------------ 216 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 217 CeedInt num_blk, blk_size, num_comp, comp_stride; 218 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 219 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 220 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 221 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 222 CeedElemRestriction_Ref *impl; 223 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 224 225 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, request); 226 } 227 228 //------------------------------------------------------------------------------ 229 // ElemRestriction Apply Block 230 //------------------------------------------------------------------------------ 231 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 232 CeedRequest *request) { 233 CeedInt blk_size, num_comp, comp_stride; 234 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 235 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 236 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 237 CeedElemRestriction_Ref *impl; 238 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 239 240 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, u, v, request); 241 } 242 243 //------------------------------------------------------------------------------ 244 // ElemRestriction Get Offsets 245 //------------------------------------------------------------------------------ 246 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 247 CeedElemRestriction_Ref *impl; 248 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 249 Ceed ceed; 250 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 251 252 if (mem_type != CEED_MEM_HOST) { 253 // LCOV_EXCL_START 254 return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 255 // LCOV_EXCL_STOP 256 } 257 258 *offsets = impl->offsets; 259 return CEED_ERROR_SUCCESS; 260 } 261 262 //------------------------------------------------------------------------------ 263 // ElemRestriction Destroy 264 //------------------------------------------------------------------------------ 265 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 266 CeedElemRestriction_Ref *impl; 267 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 268 269 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 270 CeedCallBackend(CeedFree(&impl->orient_allocated)); 271 CeedCallBackend(CeedFree(&impl)); 272 return CEED_ERROR_SUCCESS; 273 } 274 275 //------------------------------------------------------------------------------ 276 // ElemRestriction Create 277 //------------------------------------------------------------------------------ 278 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) { 279 CeedElemRestriction_Ref *impl; 280 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 281 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 282 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 283 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 284 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 285 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 286 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 287 Ceed ceed; 288 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 289 290 if (mem_type != CEED_MEM_HOST) { 291 // LCOV_EXCL_START 292 return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 293 // LCOV_EXCL_STOP 294 } 295 CeedCallBackend(CeedCalloc(1, &impl)); 296 297 // Offsets data 298 bool is_strided; 299 CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided)); 300 if (!is_strided) { 301 // Check indices for ref or memcheck backends 302 Ceed parent_ceed = ceed, curr_ceed = NULL; 303 while (parent_ceed != curr_ceed) { 304 curr_ceed = parent_ceed; 305 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 306 } 307 const char *resource; 308 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 309 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 310 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 311 CeedSize l_size; 312 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 313 314 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 315 if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) { 316 // LCOV_EXCL_START 317 return CeedError(ceed, CEED_ERROR_BACKEND, "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, 318 offsets[i], l_size); 319 // LCOV_EXCL_STOP 320 } 321 } 322 } 323 324 // Copy data 325 switch (copy_mode) { 326 case CEED_COPY_VALUES: 327 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 328 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 329 impl->offsets = impl->offsets_allocated; 330 break; 331 case CEED_OWN_POINTER: 332 impl->offsets_allocated = (CeedInt *)offsets; 333 impl->offsets = impl->offsets_allocated; 334 break; 335 case CEED_USE_POINTER: 336 impl->offsets = offsets; 337 } 338 } 339 340 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 341 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 342 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 343 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 344 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 345 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 346 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 347 348 // Set apply function based upon num_comp, blk_size, and comp_stride 349 CeedInt idx = -1; 350 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 351 switch (idx) { 352 case 110: 353 impl->Apply = CeedElemRestrictionApply_Ref_110; 354 break; 355 case 111: 356 impl->Apply = CeedElemRestrictionApply_Ref_111; 357 break; 358 case 180: 359 impl->Apply = CeedElemRestrictionApply_Ref_180; 360 break; 361 case 181: 362 impl->Apply = CeedElemRestrictionApply_Ref_181; 363 break; 364 case 310: 365 impl->Apply = CeedElemRestrictionApply_Ref_310; 366 break; 367 case 311: 368 impl->Apply = CeedElemRestrictionApply_Ref_311; 369 break; 370 case 380: 371 impl->Apply = CeedElemRestrictionApply_Ref_380; 372 break; 373 case 381: 374 impl->Apply = CeedElemRestrictionApply_Ref_381; 375 break; 376 // LCOV_EXCL_START 377 case 510: 378 impl->Apply = CeedElemRestrictionApply_Ref_510; 379 break; 380 // LCOV_EXCL_STOP 381 case 511: 382 impl->Apply = CeedElemRestrictionApply_Ref_511; 383 break; 384 // LCOV_EXCL_START 385 case 580: 386 impl->Apply = CeedElemRestrictionApply_Ref_580; 387 break; 388 // LCOV_EXCL_STOP 389 case 581: 390 impl->Apply = CeedElemRestrictionApply_Ref_581; 391 break; 392 default: 393 impl->Apply = CeedElemRestrictionApply_Ref_Core; 394 break; 395 } 396 397 return CEED_ERROR_SUCCESS; 398 } 399 400 //------------------------------------------------------------------------------ 401 // ElemRestriction Create Oriented 402 //------------------------------------------------------------------------------ 403 int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient, 404 CeedElemRestriction r) { 405 CeedElemRestriction_Ref *impl; 406 CeedInt num_elem, elem_size; 407 // Set up for normal restriction with explicit offsets. This sets up dispatch to 408 // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 409 CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r)); 410 411 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 412 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 413 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 414 switch (copy_mode) { 415 case CEED_COPY_VALUES: 416 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated)); 417 memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0])); 418 impl->orient = impl->orient_allocated; 419 break; 420 case CEED_OWN_POINTER: 421 impl->orient_allocated = (bool *)orient; 422 impl->orient = impl->orient_allocated; 423 break; 424 case CEED_USE_POINTER: 425 impl->orient = orient; 426 } 427 return CEED_ERROR_SUCCESS; 428 } 429 //------------------------------------------------------------------------------ 430