1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <string.h> 21 #include "ceed-ref.h" 22 23 //------------------------------------------------------------------------------ 24 // Core ElemRestriction Apply Code 25 //------------------------------------------------------------------------------ 26 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 29 CeedVector v, CeedRequest *request) { 30 int ierr; 31 CeedElemRestriction_Ref *impl; 32 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 33 const CeedScalar *uu; 34 CeedScalar *vv; 35 CeedInt num_elem, elem_size, v_offset; 36 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38 v_offset = start*blk_size*elem_size*num_comp; 39 40 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 41 if (t_mode == CEED_TRANSPOSE) { 42 // Sum into for transpose mode, e-vec to l-vec 43 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 44 } else { 45 // Overwrite for notranspose mode, l-vec to e-vec 46 ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 47 } 48 // Restriction from L-vector to E-vector 49 // Perform: v = r * u 50 if (t_mode == CEED_NOTRANSPOSE) { 51 // No offsets provided, Identity Restriction 52 if (!impl->offsets) { 53 bool has_backend_strides; 54 ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 55 CeedChkBackend(ierr); 56 if (has_backend_strides) { 57 // CPU backend strides are {1, elem_size, elem_size*num_comp} 58 // This if branch is left separate to allow better inlining 59 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 60 CeedPragmaSIMD 61 for (CeedInt k = 0; k < num_comp; k++) 62 CeedPragmaSIMD 63 for (CeedInt n = 0; n < elem_size; n++) 64 CeedPragmaSIMD 65 for (CeedInt j = 0; j < blk_size; j++) 66 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 67 = uu[n + k*elem_size + 68 CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 69 } else { 70 // User provided strides 71 CeedInt strides[3]; 72 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 73 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 74 CeedPragmaSIMD 75 for (CeedInt k = 0; k < num_comp; k++) 76 CeedPragmaSIMD 77 for (CeedInt n = 0; n < elem_size; n++) 78 CeedPragmaSIMD 79 for (CeedInt j = 0; j < blk_size; j++) 80 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 81 = uu[n*strides[0] + k*strides[1] + 82 CeedIntMin(e+j, num_elem-1)*strides[2]]; 83 } 84 } else { 85 // Offsets provided, standard or blocked restriction 86 // vv has shape [elem_size, num_comp, num_elem], row-major 87 // uu has shape [nnodes, num_comp] 88 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 89 CeedPragmaSIMD 90 for (CeedInt k = 0; k < num_comp; k++) 91 CeedPragmaSIMD 92 for (CeedInt i = 0; i < elem_size*blk_size; i++) 93 vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 94 = uu[impl->offsets[i+elem_size*e] + k*comp_stride]; 95 } 96 } else { 97 // Restriction from E-vector to L-vector 98 // Performing v += r^T * u 99 // No offsets provided, Identity Restriction 100 if (!impl->offsets) { 101 bool has_backend_strides; 102 ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 103 CeedChkBackend(ierr); 104 if (has_backend_strides) { 105 // CPU backend strides are {1, elem_size, elem_size*num_comp} 106 // This if brach is left separate to allow better inlining 107 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 108 CeedPragmaSIMD 109 for (CeedInt k = 0; k < num_comp; k++) 110 CeedPragmaSIMD 111 for (CeedInt n = 0; n < elem_size; n++) 112 CeedPragmaSIMD 113 for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 114 vv[n + k*elem_size + (e+j)*elem_size*num_comp] 115 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 116 } else { 117 // User provided strides 118 CeedInt strides[3]; 119 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 120 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 121 CeedPragmaSIMD 122 for (CeedInt k = 0; k < num_comp; k++) 123 CeedPragmaSIMD 124 for (CeedInt n = 0; n < elem_size; n++) 125 CeedPragmaSIMD 126 for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 127 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 128 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 129 } 130 } else { 131 // Offsets provided, standard or blocked restriction 132 // uu has shape [elem_size, num_comp, num_elem] 133 // vv has shape [nnodes, num_comp] 134 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 135 for (CeedInt k = 0; k < num_comp; k++) 136 for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 137 // Iteration bound set to discard padding elements 138 for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 139 vv[impl->offsets[j+e*elem_size] + k*comp_stride] 140 += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; 141 } 142 } 143 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 144 ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 145 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 146 *request = NULL; 147 return CEED_ERROR_SUCCESS; 148 } 149 150 //------------------------------------------------------------------------------ 151 // ElemRestriction Apply - Common Sizes 152 //------------------------------------------------------------------------------ 153 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 154 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 155 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 156 CeedVector v, CeedRequest *request) { 157 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 158 t_mode, u, v, request); 159 } 160 161 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 162 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 163 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 164 CeedVector v, CeedRequest *request) { 165 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 166 u, v, request); 167 } 168 169 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 170 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 171 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 172 CeedVector v, CeedRequest *request) { 173 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 174 t_mode, u, v, request); 175 } 176 177 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 178 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 179 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 180 CeedVector v, CeedRequest *request) { 181 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 182 u, v, request); 183 } 184 185 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 186 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 187 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 188 CeedVector v, CeedRequest *request) { 189 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 190 t_mode, u, v, request); 191 } 192 193 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 194 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 195 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 196 CeedVector v, CeedRequest *request) { 197 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 198 u, v, request); 199 } 200 201 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 202 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 203 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 204 CeedVector v, CeedRequest *request) { 205 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 206 t_mode, u, v, request); 207 } 208 209 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 210 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 211 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 212 CeedVector v, CeedRequest *request) { 213 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 214 u, v, request); 215 } 216 217 // LCOV_EXCL_START 218 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 219 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 220 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 221 CeedVector v, CeedRequest *request) { 222 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 223 t_mode, u, v, request); 224 } 225 // LCOV_EXCL_STOP 226 227 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 228 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 229 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 230 CeedVector v, CeedRequest *request) { 231 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 232 u, v, request); 233 } 234 235 // LCOV_EXCL_START 236 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 237 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 238 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 239 CeedVector v, CeedRequest *request) { 240 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 241 t_mode, u, v, request); 242 } 243 // LCOV_EXCL_STOP 244 245 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 246 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 247 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 248 CeedVector v, CeedRequest *request) { 249 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 250 u, v, request); 251 } 252 253 //------------------------------------------------------------------------------ 254 // ElemRestriction Apply 255 //------------------------------------------------------------------------------ 256 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 257 CeedTransposeMode t_mode, CeedVector u, 258 CeedVector v, CeedRequest *request) { 259 int ierr; 260 CeedInt num_blk, blk_size, num_comp, comp_stride; 261 ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 262 ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 263 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 264 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 265 CeedElemRestriction_Ref *impl; 266 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 267 268 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 269 request); 270 } 271 272 //------------------------------------------------------------------------------ 273 // ElemRestriction Apply Block 274 //------------------------------------------------------------------------------ 275 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 276 CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 277 CeedRequest *request) { 278 int ierr; 279 CeedInt blk_size, num_comp, comp_stride; 280 ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 281 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 282 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 283 CeedElemRestriction_Ref *impl; 284 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 285 286 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 287 u, v, request); 288 } 289 290 //------------------------------------------------------------------------------ 291 // ElemRestriction Get Offsets 292 //------------------------------------------------------------------------------ 293 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 294 CeedMemType mem_type, const CeedInt **offsets) { 295 int ierr; 296 CeedElemRestriction_Ref *impl; 297 ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 298 Ceed ceed; 299 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 300 301 if (mem_type != CEED_MEM_HOST) 302 // LCOV_EXCL_START 303 return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 304 // LCOV_EXCL_STOP 305 306 *offsets = impl->offsets; 307 return CEED_ERROR_SUCCESS; 308 } 309 310 //------------------------------------------------------------------------------ 311 // ElemRestriction Destroy 312 //------------------------------------------------------------------------------ 313 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 314 int ierr; 315 CeedElemRestriction_Ref *impl; 316 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 317 318 ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 319 ierr = CeedFree(&impl); CeedChkBackend(ierr); 320 return CEED_ERROR_SUCCESS; 321 } 322 323 //------------------------------------------------------------------------------ 324 // ElemRestriction Create 325 //------------------------------------------------------------------------------ 326 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 327 const CeedInt *offsets, 328 CeedElemRestriction r) { 329 int ierr; 330 CeedElemRestriction_Ref *impl; 331 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 332 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 333 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 334 ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 335 ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 336 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 337 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 338 Ceed ceed; 339 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 340 341 if (mem_type != CEED_MEM_HOST) 342 // LCOV_EXCL_START 343 return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 344 // LCOV_EXCL_STOP 345 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 346 347 // Offsets data 348 bool is_strided; 349 ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 350 if (!is_strided) { 351 // Check indices for ref or memcheck backends 352 Ceed parent_ceed = ceed, curr_ceed = NULL; 353 while (parent_ceed != curr_ceed) { 354 curr_ceed = parent_ceed; 355 ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 356 } 357 const char *resource; 358 ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 359 if (!strcmp(resource, "/cpu/self/ref/serial") || 360 !strcmp(resource, "/cpu/self/ref/blocked") || 361 !strcmp(resource, "/cpu/self/memcheck/serial") || 362 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 363 CeedInt l_size; 364 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 365 366 for (CeedInt i = 0; i < num_elem*elem_size; i++) 367 if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 368 // LCOV_EXCL_START 369 return CeedError(ceed, CEED_ERROR_BACKEND, 370 "Restriction offset %d (%d) out of range " 371 "[0, %d]", i, offsets[i], l_size); 372 // LCOV_EXCL_STOP 373 } 374 375 // Copy data 376 switch (copy_mode) { 377 case CEED_COPY_VALUES: 378 ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 379 CeedChkBackend(ierr); 380 memcpy(impl->offsets_allocated, offsets, 381 num_elem * elem_size * sizeof(offsets[0])); 382 impl->offsets = impl->offsets_allocated; 383 break; 384 case CEED_OWN_POINTER: 385 impl->offsets_allocated = (CeedInt *)offsets; 386 impl->offsets = impl->offsets_allocated; 387 break; 388 case CEED_USE_POINTER: 389 impl->offsets = offsets; 390 } 391 } 392 393 ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 394 CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 395 ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 396 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 397 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 398 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 399 CeedElemRestrictionApplyBlock_Ref); 400 CeedChkBackend(ierr); 401 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 402 CeedElemRestrictionGetOffsets_Ref); 403 CeedChkBackend(ierr); 404 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 405 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 406 407 // Set apply function based upon num_comp, blk_size, and comp_stride 408 CeedInt idx = -1; 409 if (blk_size < 10) 410 idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 411 switch (idx) { 412 case 110: 413 impl->Apply = CeedElemRestrictionApply_Ref_110; 414 break; 415 case 111: 416 impl->Apply = CeedElemRestrictionApply_Ref_111; 417 break; 418 case 180: 419 impl->Apply = CeedElemRestrictionApply_Ref_180; 420 break; 421 case 181: 422 impl->Apply = CeedElemRestrictionApply_Ref_181; 423 break; 424 case 310: 425 impl->Apply = CeedElemRestrictionApply_Ref_310; 426 break; 427 case 311: 428 impl->Apply = CeedElemRestrictionApply_Ref_311; 429 break; 430 case 380: 431 impl->Apply = CeedElemRestrictionApply_Ref_380; 432 break; 433 case 381: 434 impl->Apply = CeedElemRestrictionApply_Ref_381; 435 break; 436 // LCOV_EXCL_START 437 case 510: 438 impl->Apply = CeedElemRestrictionApply_Ref_510; 439 break; 440 // LCOV_EXCL_STOP 441 case 511: 442 impl->Apply = CeedElemRestrictionApply_Ref_511; 443 break; 444 // LCOV_EXCL_START 445 case 580: 446 impl->Apply = CeedElemRestrictionApply_Ref_580; 447 break; 448 // LCOV_EXCL_STOP 449 case 581: 450 impl->Apply = CeedElemRestrictionApply_Ref_581; 451 break; 452 default: 453 impl->Apply = CeedElemRestrictionApply_Ref_Core; 454 break; 455 } 456 457 return CEED_ERROR_SUCCESS; 458 } 459 //------------------------------------------------------------------------------ 460