1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <string.h> 12 13 #include "ceed-ref.h" 14 15 //------------------------------------------------------------------------------ 16 // Core ElemRestriction Apply Code 17 //------------------------------------------------------------------------------ 18 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 19 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 20 CeedRequest *request) { 21 CeedElemRestriction_Ref *impl; 22 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 23 const CeedScalar *uu; 24 CeedScalar *vv; 25 CeedInt num_elem, elem_size, v_offset; 26 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 27 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 28 v_offset = start * blk_size * elem_size * num_comp; 29 30 bool is_oriented; 31 CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented)); 32 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 33 if (t_mode == CEED_TRANSPOSE) { 34 // Sum into for transpose mode, e-vec to l-vec 35 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 36 } else { 37 // Overwrite for notranspose mode, l-vec to e-vec 38 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 39 } 40 // Restriction from L-vector to E-vector 41 // Perform: v = r * u 42 if (t_mode == CEED_NOTRANSPOSE) { 43 // No offsets provided, Identity Restriction 44 if (!impl->offsets) { 45 bool has_backend_strides; 46 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47 if (has_backend_strides) { 48 // CPU backend strides are {1, elem_size, elem_size*num_comp} 49 // This if branch is left separate to allow better inlining 50 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 51 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 52 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 53 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 54 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 55 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 56 } 57 } 58 } 59 } 60 } else { 61 // User provided strides 62 CeedInt strides[3]; 63 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 64 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 65 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 66 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 67 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 68 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 69 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 70 } 71 } 72 } 73 } 74 } 75 } else { 76 // Offsets provided, standard or blocked restriction 77 // vv has shape [elem_size, num_comp, num_elem], row-major 78 // uu has shape [nnodes, num_comp] 79 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 80 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 81 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 82 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 83 uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.); 84 } 85 } 86 } 87 } 88 } else { 89 // Restriction from E-vector to L-vector 90 // Performing v += r^T * u 91 // No offsets provided, Identity Restriction 92 if (!impl->offsets) { 93 bool has_backend_strides; 94 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 95 if (has_backend_strides) { 96 // CPU backend strides are {1, elem_size, elem_size*num_comp} 97 // This if brach is left separate to allow better inlining 98 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 99 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 100 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 101 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 102 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 103 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 104 } 105 } 106 } 107 } 108 } else { 109 // User provided strides 110 CeedInt strides[3]; 111 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 112 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 113 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 114 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 115 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 116 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 117 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 118 } 119 } 120 } 121 } 122 } 123 } else { 124 // Offsets provided, standard or blocked restriction 125 // uu has shape [elem_size, num_comp, num_elem] 126 // vv has shape [nnodes, num_comp] 127 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 128 for (CeedInt k = 0; k < num_comp; k++) { 129 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 130 // Iteration bound set to discard padding elements 131 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 132 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 133 uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (is_oriented && impl->orient[j + e * elem_size] ? -1. : 1.); 134 } 135 } 136 } 137 } 138 } 139 } 140 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 141 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 142 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 143 return CEED_ERROR_SUCCESS; 144 } 145 146 //------------------------------------------------------------------------------ 147 // ElemRestriction Apply - Common Sizes 148 //------------------------------------------------------------------------------ 149 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 150 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 151 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, u, v, request); 152 } 153 154 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 155 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 156 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, u, v, request); 157 } 158 159 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 160 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 161 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, u, v, request); 162 } 163 164 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 165 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 166 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, u, v, request); 167 } 168 169 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 170 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 171 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, u, v, request); 172 } 173 174 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 175 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 176 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, u, v, request); 177 } 178 179 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 180 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 181 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, u, v, request); 182 } 183 184 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 185 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 186 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, u, v, request); 187 } 188 189 // LCOV_EXCL_START 190 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 191 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 192 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, u, v, request); 193 } 194 // LCOV_EXCL_STOP 195 196 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 197 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 198 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, u, v, request); 199 } 200 201 // LCOV_EXCL_START 202 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 203 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 204 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, u, v, request); 205 } 206 // LCOV_EXCL_STOP 207 208 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 209 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 210 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, u, v, request); 211 } 212 213 //------------------------------------------------------------------------------ 214 // ElemRestriction Apply 215 //------------------------------------------------------------------------------ 216 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 217 CeedInt num_blk, blk_size, num_comp, comp_stride; 218 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 219 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 220 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 221 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 222 CeedElemRestriction_Ref *impl; 223 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 224 225 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, request); 226 } 227 228 //------------------------------------------------------------------------------ 229 // ElemRestriction Apply Block 230 //------------------------------------------------------------------------------ 231 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 232 CeedRequest *request) { 233 CeedInt blk_size, num_comp, comp_stride; 234 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 235 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 236 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 237 CeedElemRestriction_Ref *impl; 238 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 239 240 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, u, v, request); 241 } 242 243 //------------------------------------------------------------------------------ 244 // ElemRestriction Get Offsets 245 //------------------------------------------------------------------------------ 246 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 247 CeedElemRestriction_Ref *impl; 248 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 249 Ceed ceed; 250 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 251 252 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 253 254 *offsets = impl->offsets; 255 return CEED_ERROR_SUCCESS; 256 } 257 258 //------------------------------------------------------------------------------ 259 // ElemRestriction Destroy 260 //------------------------------------------------------------------------------ 261 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 262 CeedElemRestriction_Ref *impl; 263 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 264 265 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 266 CeedCallBackend(CeedFree(&impl->orient_allocated)); 267 CeedCallBackend(CeedFree(&impl)); 268 return CEED_ERROR_SUCCESS; 269 } 270 271 //------------------------------------------------------------------------------ 272 // ElemRestriction Create 273 //------------------------------------------------------------------------------ 274 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) { 275 CeedElemRestriction_Ref *impl; 276 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 277 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 278 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 279 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 280 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 281 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 282 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 283 Ceed ceed; 284 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 285 286 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 287 CeedCallBackend(CeedCalloc(1, &impl)); 288 289 // Offsets data 290 bool is_strided; 291 CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided)); 292 if (!is_strided) { 293 // Check indices for ref or memcheck backends 294 Ceed parent_ceed = ceed, curr_ceed = NULL; 295 while (parent_ceed != curr_ceed) { 296 curr_ceed = parent_ceed; 297 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 298 } 299 const char *resource; 300 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 301 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 302 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 303 CeedSize l_size; 304 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 305 306 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 307 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 308 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 309 } 310 } 311 312 // Copy data 313 switch (copy_mode) { 314 case CEED_COPY_VALUES: 315 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 316 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 317 impl->offsets = impl->offsets_allocated; 318 break; 319 case CEED_OWN_POINTER: 320 impl->offsets_allocated = (CeedInt *)offsets; 321 impl->offsets = impl->offsets_allocated; 322 break; 323 case CEED_USE_POINTER: 324 impl->offsets = offsets; 325 } 326 } 327 328 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 329 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 330 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 331 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 332 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 333 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 334 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 335 336 // Set apply function based upon num_comp, blk_size, and comp_stride 337 CeedInt idx = -1; 338 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 339 switch (idx) { 340 case 110: 341 impl->Apply = CeedElemRestrictionApply_Ref_110; 342 break; 343 case 111: 344 impl->Apply = CeedElemRestrictionApply_Ref_111; 345 break; 346 case 180: 347 impl->Apply = CeedElemRestrictionApply_Ref_180; 348 break; 349 case 181: 350 impl->Apply = CeedElemRestrictionApply_Ref_181; 351 break; 352 case 310: 353 impl->Apply = CeedElemRestrictionApply_Ref_310; 354 break; 355 case 311: 356 impl->Apply = CeedElemRestrictionApply_Ref_311; 357 break; 358 case 380: 359 impl->Apply = CeedElemRestrictionApply_Ref_380; 360 break; 361 case 381: 362 impl->Apply = CeedElemRestrictionApply_Ref_381; 363 break; 364 // LCOV_EXCL_START 365 case 510: 366 impl->Apply = CeedElemRestrictionApply_Ref_510; 367 break; 368 // LCOV_EXCL_STOP 369 case 511: 370 impl->Apply = CeedElemRestrictionApply_Ref_511; 371 break; 372 // LCOV_EXCL_START 373 case 580: 374 impl->Apply = CeedElemRestrictionApply_Ref_580; 375 break; 376 // LCOV_EXCL_STOP 377 case 581: 378 impl->Apply = CeedElemRestrictionApply_Ref_581; 379 break; 380 default: 381 impl->Apply = CeedElemRestrictionApply_Ref_Core; 382 break; 383 } 384 385 return CEED_ERROR_SUCCESS; 386 } 387 388 //------------------------------------------------------------------------------ 389 // ElemRestriction Create Oriented 390 //------------------------------------------------------------------------------ 391 int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient, 392 CeedElemRestriction r) { 393 CeedElemRestriction_Ref *impl; 394 CeedInt num_elem, elem_size; 395 // Set up for normal restriction with explicit offsets. This sets up dispatch to 396 // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 397 CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r)); 398 399 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 400 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 401 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 402 switch (copy_mode) { 403 case CEED_COPY_VALUES: 404 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated)); 405 memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0])); 406 impl->orient = impl->orient_allocated; 407 break; 408 case CEED_OWN_POINTER: 409 impl->orient_allocated = (bool *)orient; 410 impl->orient = impl->orient_allocated; 411 break; 412 case CEED_USE_POINTER: 413 impl->orient = orient; 414 } 415 return CEED_ERROR_SUCCESS; 416 } 417 418 //------------------------------------------------------------------------------ 419