1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <string.h> 21 #include "ceed-ref.h" 22 23 //------------------------------------------------------------------------------ 24 // Core ElemRestriction Apply Code 25 //------------------------------------------------------------------------------ 26 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 29 CeedVector v, CeedRequest *request) { 30 int ierr; 31 CeedElemRestriction_Ref *impl; 32 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 33 const CeedScalar *uu; 34 CeedScalar *vv; 35 CeedInt num_elem, elem_size, v_offset; 36 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38 v_offset = start*blk_size*elem_size*num_comp; 39 40 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 41 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 42 // Restriction from L-vector to E-vector 43 // Perform: v = r * u 44 if (t_mode == CEED_NOTRANSPOSE) { 45 // No offsets provided, Identity Restriction 46 if (!impl->offsets) { 47 bool has_backend_strides; 48 ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 49 CeedChkBackend(ierr); 50 if (has_backend_strides) { 51 // CPU backend strides are {1, elem_size, elem_size*num_comp} 52 // This if branch is left separate to allow better inlining 53 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 54 CeedPragmaSIMD 55 for (CeedInt k = 0; k < num_comp; k++) 56 CeedPragmaSIMD 57 for (CeedInt n = 0; n < elem_size; n++) 58 CeedPragmaSIMD 59 for (CeedInt j = 0; j < blk_size; j++) 60 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 61 = uu[n + k*elem_size + 62 CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 63 } else { 64 // User provided strides 65 CeedInt strides[3]; 66 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 67 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 68 CeedPragmaSIMD 69 for (CeedInt k = 0; k < num_comp; k++) 70 CeedPragmaSIMD 71 for (CeedInt n = 0; n < elem_size; n++) 72 CeedPragmaSIMD 73 for (CeedInt j = 0; j < blk_size; j++) 74 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 75 = uu[n*strides[0] + k*strides[1] + 76 CeedIntMin(e+j, num_elem-1)*strides[2]]; 77 } 78 } else { 79 // Offsets provided, standard or blocked restriction 80 // vv has shape [elem_size, num_comp, num_elem], row-major 81 // uu has shape [nnodes, num_comp] 82 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 83 CeedPragmaSIMD 84 for (CeedInt k = 0; k < num_comp; k++) 85 CeedPragmaSIMD 86 for (CeedInt i = 0; i < elem_size*blk_size; i++) 87 vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 88 = uu[impl->offsets[i+elem_size*e] + k*comp_stride]; 89 } 90 } else { 91 // Restriction from E-vector to L-vector 92 // Performing v += r^T * u 93 // No offsets provided, Identity Restriction 94 if (!impl->offsets) { 95 bool has_backend_strides; 96 ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 97 CeedChkBackend(ierr); 98 if (has_backend_strides) { 99 // CPU backend strides are {1, elem_size, elem_size*num_comp} 100 // This if brach is left separate to allow better inlining 101 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 102 CeedPragmaSIMD 103 for (CeedInt k = 0; k < num_comp; k++) 104 CeedPragmaSIMD 105 for (CeedInt n = 0; n < elem_size; n++) 106 CeedPragmaSIMD 107 for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 108 vv[n + k*elem_size + (e+j)*elem_size*num_comp] 109 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 110 } else { 111 // User provided strides 112 CeedInt strides[3]; 113 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 114 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 115 CeedPragmaSIMD 116 for (CeedInt k = 0; k < num_comp; k++) 117 CeedPragmaSIMD 118 for (CeedInt n = 0; n < elem_size; n++) 119 CeedPragmaSIMD 120 for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 121 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 122 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 123 } 124 } else { 125 // Offsets provided, standard or blocked restriction 126 // uu has shape [elem_size, num_comp, num_elem] 127 // vv has shape [nnodes, num_comp] 128 for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 129 for (CeedInt k = 0; k < num_comp; k++) 130 for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 131 // Iteration bound set to discard padding elements 132 for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 133 vv[impl->offsets[j+e*elem_size] + k*comp_stride] 134 += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; 135 } 136 } 137 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 138 ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 139 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 140 *request = NULL; 141 return CEED_ERROR_SUCCESS; 142 } 143 144 //------------------------------------------------------------------------------ 145 // ElemRestriction Apply - Common Sizes 146 //------------------------------------------------------------------------------ 147 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 148 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 149 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 150 CeedVector v, CeedRequest *request) { 151 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 152 t_mode, u, v, request); 153 } 154 155 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 156 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 157 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 158 CeedVector v, CeedRequest *request) { 159 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 160 u, v, request); 161 } 162 163 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 164 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 165 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 166 CeedVector v, CeedRequest *request) { 167 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 168 t_mode, u, v, request); 169 } 170 171 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 172 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 173 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 174 CeedVector v, CeedRequest *request) { 175 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 176 u, v, request); 177 } 178 179 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 180 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 181 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 182 CeedVector v, CeedRequest *request) { 183 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 184 t_mode, u, v, request); 185 } 186 187 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 188 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 189 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 190 CeedVector v, CeedRequest *request) { 191 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 192 u, v, request); 193 } 194 195 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 196 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 197 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 198 CeedVector v, CeedRequest *request) { 199 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 200 t_mode, u, v, request); 201 } 202 203 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 204 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 205 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 206 CeedVector v, CeedRequest *request) { 207 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 208 u, v, request); 209 } 210 211 // LCOV_EXCL_START 212 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 213 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 214 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 215 CeedVector v, CeedRequest *request) { 216 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 217 t_mode, u, v, request); 218 } 219 // LCOV_EXCL_STOP 220 221 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 222 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 223 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 224 CeedVector v, CeedRequest *request) { 225 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 226 u, v, request); 227 } 228 229 // LCOV_EXCL_START 230 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 231 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 232 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 233 CeedVector v, CeedRequest *request) { 234 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 235 t_mode, u, v, request); 236 } 237 // LCOV_EXCL_STOP 238 239 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 240 const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 241 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 242 CeedVector v, CeedRequest *request) { 243 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 244 u, v, request); 245 } 246 247 //------------------------------------------------------------------------------ 248 // ElemRestriction Apply 249 //------------------------------------------------------------------------------ 250 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 251 CeedTransposeMode t_mode, CeedVector u, 252 CeedVector v, CeedRequest *request) { 253 int ierr; 254 CeedInt num_blk, blk_size, num_comp, comp_stride; 255 ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 256 ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 257 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 258 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 259 CeedElemRestriction_Ref *impl; 260 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 261 262 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 263 request); 264 } 265 266 //------------------------------------------------------------------------------ 267 // ElemRestriction Apply Block 268 //------------------------------------------------------------------------------ 269 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 270 CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 271 CeedRequest *request) { 272 int ierr; 273 CeedInt blk_size, num_comp, comp_stride; 274 ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 275 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 276 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 277 CeedElemRestriction_Ref *impl; 278 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 279 280 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 281 u, v, request); 282 } 283 284 //------------------------------------------------------------------------------ 285 // ElemRestriction Get Offsets 286 //------------------------------------------------------------------------------ 287 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 288 CeedMemType mem_type, const CeedInt **offsets) { 289 int ierr; 290 CeedElemRestriction_Ref *impl; 291 ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 292 Ceed ceed; 293 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 294 295 if (mem_type != CEED_MEM_HOST) 296 // LCOV_EXCL_START 297 return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 298 // LCOV_EXCL_STOP 299 300 *offsets = impl->offsets; 301 return CEED_ERROR_SUCCESS; 302 } 303 304 //------------------------------------------------------------------------------ 305 // ElemRestriction Destroy 306 //------------------------------------------------------------------------------ 307 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 308 int ierr; 309 CeedElemRestriction_Ref *impl; 310 ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 311 312 ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 313 ierr = CeedFree(&impl); CeedChkBackend(ierr); 314 return CEED_ERROR_SUCCESS; 315 } 316 317 //------------------------------------------------------------------------------ 318 // ElemRestriction Create 319 //------------------------------------------------------------------------------ 320 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 321 const CeedInt *offsets, 322 CeedElemRestriction r) { 323 int ierr; 324 CeedElemRestriction_Ref *impl; 325 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 326 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 327 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 328 ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 329 ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 330 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 331 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 332 Ceed ceed; 333 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 334 335 if (mem_type != CEED_MEM_HOST) 336 // LCOV_EXCL_START 337 return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 338 // LCOV_EXCL_STOP 339 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 340 341 // Offsets data 342 bool is_strided; 343 ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 344 if (!is_strided) { 345 // Check indices for ref or memcheck backends 346 Ceed parent_ceed = ceed, curr_ceed = NULL; 347 while (parent_ceed != curr_ceed) { 348 curr_ceed = parent_ceed; 349 ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 350 } 351 const char *resource; 352 ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 353 if (!strcmp(resource, "/cpu/self/ref/serial") || 354 !strcmp(resource, "/cpu/self/ref/blocked") || 355 !strcmp(resource, "/cpu/self/memcheck/serial") || 356 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 357 CeedInt l_size; 358 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 359 360 for (CeedInt i = 0; i < num_elem*elem_size; i++) 361 if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 362 // LCOV_EXCL_START 363 return CeedError(ceed, CEED_ERROR_BACKEND, 364 "Restriction offset %d (%d) out of range " 365 "[0, %d]", i, offsets[i], l_size); 366 // LCOV_EXCL_STOP 367 } 368 369 // Copy data 370 switch (copy_mode) { 371 case CEED_COPY_VALUES: 372 ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 373 CeedChkBackend(ierr); 374 memcpy(impl->offsets_allocated, offsets, 375 num_elem * elem_size * sizeof(offsets[0])); 376 impl->offsets = impl->offsets_allocated; 377 break; 378 case CEED_OWN_POINTER: 379 impl->offsets_allocated = (CeedInt *)offsets; 380 impl->offsets = impl->offsets_allocated; 381 break; 382 case CEED_USE_POINTER: 383 impl->offsets = offsets; 384 } 385 } 386 387 ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 388 CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 389 ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 390 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 391 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 392 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 393 CeedElemRestrictionApplyBlock_Ref); 394 CeedChkBackend(ierr); 395 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 396 CeedElemRestrictionGetOffsets_Ref); 397 CeedChkBackend(ierr); 398 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 399 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 400 401 // Set apply function based upon num_comp, blk_size, and comp_stride 402 CeedInt idx = -1; 403 if (blk_size < 10) 404 idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 405 switch (idx) { 406 case 110: 407 impl->Apply = CeedElemRestrictionApply_Ref_110; 408 break; 409 case 111: 410 impl->Apply = CeedElemRestrictionApply_Ref_111; 411 break; 412 case 180: 413 impl->Apply = CeedElemRestrictionApply_Ref_180; 414 break; 415 case 181: 416 impl->Apply = CeedElemRestrictionApply_Ref_181; 417 break; 418 case 310: 419 impl->Apply = CeedElemRestrictionApply_Ref_310; 420 break; 421 case 311: 422 impl->Apply = CeedElemRestrictionApply_Ref_311; 423 break; 424 case 380: 425 impl->Apply = CeedElemRestrictionApply_Ref_380; 426 break; 427 case 381: 428 impl->Apply = CeedElemRestrictionApply_Ref_381; 429 break; 430 // LCOV_EXCL_START 431 case 510: 432 impl->Apply = CeedElemRestrictionApply_Ref_510; 433 break; 434 // LCOV_EXCL_STOP 435 case 511: 436 impl->Apply = CeedElemRestrictionApply_Ref_511; 437 break; 438 // LCOV_EXCL_START 439 case 580: 440 impl->Apply = CeedElemRestrictionApply_Ref_580; 441 break; 442 // LCOV_EXCL_STOP 443 case 581: 444 impl->Apply = CeedElemRestrictionApply_Ref_581; 445 break; 446 default: 447 impl->Apply = CeedElemRestrictionApply_Ref_Core; 448 break; 449 } 450 451 return CEED_ERROR_SUCCESS; 452 } 453 //------------------------------------------------------------------------------ 454