1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 24 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 voffset = start*blksize*elemsize*ncomp; 35 36 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 37 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 38 // Restriction from L-vector to E-vector 39 // Perform: v = r * u 40 if (tmode == CEED_NOTRANSPOSE) { 41 // No offsets provided, Identity Restriction 42 if (!impl->offsets) { 43 bool backendstrides; 44 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 45 CeedChk(ierr); 46 if (backendstrides) { 47 // CPU backend strides are {1, elemsize, elemsize*ncomp} 48 // This if branch is left separate to allow better inlining 49 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 50 CeedPragmaSIMD 51 for (CeedInt k = 0; k < ncomp; k++) 52 CeedPragmaSIMD 53 for (CeedInt n = 0; n < elemsize; n++) 54 CeedPragmaSIMD 55 for (CeedInt j = 0; j < blksize; j++) 56 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 57 = uu[n + k*elemsize + 58 CeedIntMin(e+j, nelem-1)*elemsize*ncomp]; 59 } else { 60 // User provided strides 61 CeedInt strides[3]; 62 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 63 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 64 CeedPragmaSIMD 65 for (CeedInt k = 0; k < ncomp; k++) 66 CeedPragmaSIMD 67 for (CeedInt n = 0; n < elemsize; n++) 68 CeedPragmaSIMD 69 for (CeedInt j = 0; j < blksize; j++) 70 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 71 = uu[n*strides[0] + k*strides[1] + 72 CeedIntMin(e+j, nelem-1)*strides[2]]; 73 } 74 } else { 75 // Offsets provided, standard or blocked restriction 76 // vv has shape [elemsize, ncomp, nelem], row-major 77 // uu has shape [nnodes, ncomp] 78 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 79 CeedPragmaSIMD 80 for (CeedInt k = 0; k < ncomp; k++) 81 CeedPragmaSIMD 82 for (CeedInt i = 0; i < elemsize*blksize; i++) 83 vv[elemsize*(k*blksize+ncomp*e) + i - voffset] 84 = uu[impl->offsets[i+elemsize*e] + k*compstride]; 85 } 86 } else { 87 // Restriction from E-vector to L-vector 88 // Performing v += r^T * u 89 // No offsets provided, Identity Restriction 90 if (!impl->offsets) { 91 bool backendstrides; 92 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 93 CeedChk(ierr); 94 if (backendstrides) { 95 // CPU backend strides are {1, elemsize, elemsize*ncomp} 96 // This if brach is left separate to allow better inlining 97 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 98 CeedPragmaSIMD 99 for (CeedInt k = 0; k < ncomp; k++) 100 CeedPragmaSIMD 101 for (CeedInt n = 0; n < elemsize; n++) 102 CeedPragmaSIMD 103 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 104 vv[n + k*elemsize + (e+j)*elemsize*ncomp] 105 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 106 } else { 107 // User provided strides 108 CeedInt strides[3]; 109 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 110 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 111 CeedPragmaSIMD 112 for (CeedInt k = 0; k < ncomp; k++) 113 CeedPragmaSIMD 114 for (CeedInt n = 0; n < elemsize; n++) 115 CeedPragmaSIMD 116 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 117 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 118 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 119 } 120 } else { 121 // Offsets provided, standard or blocked restriction 122 // uu has shape [elemsize, ncomp, nelem] 123 // vv has shape [nnodes, ncomp] 124 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 125 for (CeedInt k = 0; k < ncomp; k++) 126 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 127 // Iteration bound set to discard padding elements 128 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 129 vv[impl->offsets[j+e*elemsize] + k*compstride] 130 += uu[elemsize*(k*blksize+ncomp*e) + j - voffset]; 131 } 132 } 133 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 134 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 135 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 136 *request = NULL; 137 return 0; 138 } 139 140 //------------------------------------------------------------------------------ 141 // ElemRestriction Apply - Common Sizes 142 //------------------------------------------------------------------------------ 143 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 144 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 145 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 146 CeedVector v, CeedRequest *request) { 147 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, compstride, start, stop, 148 tmode, u, v, request); 149 } 150 151 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 152 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 153 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 154 CeedVector v, CeedRequest *request) { 155 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, tmode, 156 u, v, request); 157 } 158 159 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 160 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 161 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 162 CeedVector v, CeedRequest *request) { 163 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, compstride, start, stop, 164 tmode, u, v, request); 165 } 166 167 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 168 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 169 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 170 CeedVector v, CeedRequest *request) { 171 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, tmode, 172 u, v, request); 173 } 174 175 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 176 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 177 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 178 CeedVector v, CeedRequest *request) { 179 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, compstride, start, stop, 180 tmode, u, v, request); 181 } 182 183 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 184 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 185 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 186 CeedVector v, CeedRequest *request) { 187 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, tmode, 188 u, v, request); 189 } 190 191 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 192 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 193 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 194 CeedVector v, CeedRequest *request) { 195 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, compstride, start, stop, 196 tmode, u, v, request); 197 } 198 199 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 200 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 201 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 202 CeedVector v, CeedRequest *request) { 203 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, tmode, 204 u, v, request); 205 } 206 207 // LCOV_EXCL_START 208 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 209 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 210 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 211 CeedVector v, CeedRequest *request) { 212 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, compstride, start, stop, 213 tmode, u, v, request); 214 } 215 // LCOV_EXCL_STOP 216 217 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 218 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 219 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 220 CeedVector v, CeedRequest *request) { 221 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, tmode, 222 u, v, request); 223 } 224 225 // LCOV_EXCL_START 226 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 227 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 228 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 229 CeedVector v, CeedRequest *request) { 230 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, compstride, start, stop, 231 tmode, u, v, request); 232 } 233 // LCOV_EXCL_STOP 234 235 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 236 const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 237 CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 238 CeedVector v, CeedRequest *request) { 239 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, tmode, 240 u, v, request); 241 } 242 243 //------------------------------------------------------------------------------ 244 // ElemRestriction Apply 245 //------------------------------------------------------------------------------ 246 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 247 CeedTransposeMode tmode, CeedVector u, 248 CeedVector v, CeedRequest *request) { 249 int ierr; 250 CeedInt numblk, blksize, ncomp, compstride; 251 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 252 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 253 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 254 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 255 CeedElemRestriction_Ref *impl; 256 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 257 258 return impl->Apply(r, ncomp, blksize, compstride, 0, numblk, tmode, u, v, 259 request); 260 } 261 262 //------------------------------------------------------------------------------ 263 // ElemRestriction Apply Block 264 //------------------------------------------------------------------------------ 265 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 266 CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 267 CeedRequest *request) { 268 int ierr; 269 CeedInt blksize, ncomp, compstride; 270 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 271 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 272 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 273 CeedElemRestriction_Ref *impl; 274 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 275 276 return impl->Apply(r, ncomp, blksize, compstride, block, block+1, tmode, u, v, 277 request); 278 } 279 280 //------------------------------------------------------------------------------ 281 // ElemRestriction Get Offsets 282 //------------------------------------------------------------------------------ 283 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 284 CeedMemType mtype, const CeedInt **offsets) { 285 int ierr; 286 CeedElemRestriction_Ref *impl; 287 ierr = CeedElemRestrictionGetData(rstr, (void *)&impl); CeedChk(ierr); 288 Ceed ceed; 289 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 290 291 if (mtype != CEED_MEM_HOST) 292 // LCOV_EXCL_START 293 return CeedError(ceed, 1, "Can only provide to HOST memory"); 294 // LCOV_EXCL_STOP 295 296 *offsets = impl->offsets; 297 return 0; 298 } 299 300 //------------------------------------------------------------------------------ 301 // ElemRestriction Destroy 302 //------------------------------------------------------------------------------ 303 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 304 int ierr; 305 CeedElemRestriction_Ref *impl; 306 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 307 308 ierr = CeedFree(&impl->offsets_allocated); CeedChk(ierr); 309 ierr = CeedFree(&impl); CeedChk(ierr); 310 return 0; 311 } 312 313 //------------------------------------------------------------------------------ 314 // ElemRestriction Create 315 //------------------------------------------------------------------------------ 316 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 317 const CeedInt *offsets, 318 CeedElemRestriction r) { 319 int ierr; 320 CeedElemRestriction_Ref *impl; 321 CeedInt nelem, elemsize, numblk, blksize, ncomp, compstride; 322 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 323 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 324 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 325 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 326 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 327 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 328 Ceed ceed; 329 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 330 331 if (mtype != CEED_MEM_HOST) 332 // LCOV_EXCL_START 333 return CeedError(ceed, 1, "Only MemType = HOST supported"); 334 // LCOV_EXCL_STOP 335 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 336 337 // Check indices for ref or memcheck backends 338 if (offsets) { 339 Ceed parentCeed = ceed, currCeed = NULL; 340 while (parentCeed != currCeed) { 341 currCeed = parentCeed; 342 ierr = CeedGetParent(currCeed, &parentCeed); CeedChk(ierr); 343 } 344 const char *resource; 345 ierr = CeedGetResource(parentCeed, &resource); CeedChk(ierr); 346 if (!strcmp(resource, "/cpu/self/ref/serial") 347 || !strcmp(resource, "/cpu/self/ref/blocked") 348 || !strcmp(resource, "/cpu/self/memcheck/serial") 349 || !strcmp(resource, "/cpu/self/memcheck/blocked")) { 350 CeedInt lsize; 351 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 352 353 for (CeedInt i = 0; i < nelem*elemsize; i++) 354 if (offsets[i] < 0 || lsize <= offsets[i] + (ncomp - 1) * compstride) 355 // LCOV_EXCL_START 356 return CeedError(ceed, 1, "Restriction offset %d (%d) out of range " 357 "[0, %d]", i, offsets[i], lsize); 358 // LCOV_EXCL_STOP 359 } 360 } 361 362 // Offsets data 363 switch (cmode) { 364 case CEED_COPY_VALUES: 365 ierr = CeedMalloc(nelem*elemsize, &impl->offsets_allocated); 366 CeedChk(ierr); 367 memcpy(impl->offsets_allocated, offsets, 368 nelem * elemsize * sizeof(offsets[0])); 369 impl->offsets = impl->offsets_allocated; 370 break; 371 case CEED_OWN_POINTER: 372 impl->offsets_allocated = (CeedInt *)offsets; 373 impl->offsets = impl->offsets_allocated; 374 break; 375 case CEED_USE_POINTER: 376 impl->offsets = offsets; 377 } 378 379 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 380 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 381 CeedElemRestrictionApply_Ref); CeedChk(ierr); 382 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 383 CeedElemRestrictionApplyBlock_Ref); 384 CeedChk(ierr); 385 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 386 CeedElemRestrictionGetOffsets_Ref); 387 CeedChk(ierr); 388 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 389 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 390 391 // Set apply function based upon ncomp, blksize, and compstride 392 CeedInt idx = -1; 393 if (blksize < 10) 394 idx = 100*ncomp + 10*blksize + (compstride == 1); 395 switch (idx) { 396 case 110: 397 impl->Apply = CeedElemRestrictionApply_Ref_110; 398 break; 399 case 111: 400 impl->Apply = CeedElemRestrictionApply_Ref_111; 401 break; 402 case 180: 403 impl->Apply = CeedElemRestrictionApply_Ref_180; 404 break; 405 case 181: 406 impl->Apply = CeedElemRestrictionApply_Ref_181; 407 break; 408 case 310: 409 impl->Apply = CeedElemRestrictionApply_Ref_310; 410 break; 411 case 311: 412 impl->Apply = CeedElemRestrictionApply_Ref_311; 413 break; 414 case 380: 415 impl->Apply = CeedElemRestrictionApply_Ref_380; 416 break; 417 case 381: 418 impl->Apply = CeedElemRestrictionApply_Ref_381; 419 break; 420 // LCOV_EXCL_START 421 case 510: 422 impl->Apply = CeedElemRestrictionApply_Ref_510; 423 break; 424 // LCOV_EXCL_STOP 425 case 511: 426 impl->Apply = CeedElemRestrictionApply_Ref_511; 427 break; 428 // LCOV_EXCL_START 429 case 580: 430 impl->Apply = CeedElemRestrictionApply_Ref_580; 431 break; 432 // LCOV_EXCL_STOP 433 case 581: 434 impl->Apply = CeedElemRestrictionApply_Ref_581; 435 break; 436 default: 437 impl->Apply = CeedElemRestrictionApply_Ref_Core; 438 break; 439 } 440 441 return 0; 442 } 443 //------------------------------------------------------------------------------ 444