1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 24 CeedTransposeMode tmode, CeedInterlaceMode imode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, nnodes, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 35 voffset = start*blksize*elemsize*ncomp; 36 37 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 38 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 39 // Restriction from L-vector to E-vector 40 // Perform: v = r * u 41 if (tmode == CEED_NOTRANSPOSE) { 42 // No indices provided, Identity Restriction 43 if (!impl->indices) { 44 bool backendstrides; 45 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 46 CeedChk(ierr); 47 if (backendstrides) { 48 // CPU backend strides are {1, elemsize, elemsize*ncomp} 49 // This if branch is left separate to allow better inlining 50 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 51 CeedPragmaSIMD 52 for (CeedInt k = 0; k < ncomp; k++) 53 CeedPragmaSIMD 54 for (CeedInt n = 0; n < elemsize; n++) 55 CeedPragmaSIMD 56 for (CeedInt j = 0; j < blksize; j++) 57 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 58 = uu[n + k*elemsize + 59 CeedIntMin(e+j, nelem-1)*elemsize*ncomp]; 60 } else { 61 // User provided strides 62 CeedInt strides[3]; 63 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 64 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 65 CeedPragmaSIMD 66 for (CeedInt k = 0; k < ncomp; k++) 67 CeedPragmaSIMD 68 for (CeedInt n = 0; n < elemsize; n++) 69 CeedPragmaSIMD 70 for (CeedInt j = 0; j < blksize; j++) 71 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 72 = uu[n*strides[0] + k*strides[1] + 73 CeedIntMin(e+j, nelem-1)*strides[2]]; 74 } 75 } else { 76 // Indices provided, standard or blocked restriction 77 // vv has shape [elemsize, ncomp, nelem], row-major 78 // uu has shape [nnodes, ncomp] 79 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 80 CeedPragmaSIMD 81 for (CeedInt d = 0; d < ncomp; d++) 82 CeedPragmaSIMD 83 for (CeedInt i = 0; i < elemsize*blksize; i++) 84 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 85 = uu[imode == CEED_NONINTERLACED 86 ? impl->indices[i+elemsize*e]+nnodes*d 87 : d+ncomp*impl->indices[i+elemsize*e]]; 88 } 89 } else { 90 // Restriction from E-vector to L-vector 91 // Performing v += r^T * u 92 // No indices provided, Identity Restriction 93 if (!impl->indices) { 94 bool backendstrides; 95 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 96 CeedChk(ierr); 97 if (backendstrides) { 98 // CPU backend strides are {1, elemsize, elemsize*ncomp} 99 // This if brach is left separate to allow better inlining 100 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 101 CeedPragmaSIMD 102 for (CeedInt k = 0; k < ncomp; k++) 103 CeedPragmaSIMD 104 for (CeedInt n = 0; n < elemsize; n++) 105 CeedPragmaSIMD 106 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 107 vv[n + k*elemsize + (e+j)*elemsize*ncomp] 108 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 109 } else { 110 // User provided strides 111 CeedInt strides[3]; 112 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 113 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 114 CeedPragmaSIMD 115 for (CeedInt k = 0; k < ncomp; k++) 116 CeedPragmaSIMD 117 for (CeedInt n = 0; n < elemsize; n++) 118 CeedPragmaSIMD 119 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 120 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 121 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 122 } 123 } else { 124 // Indices provided, standard or blocked restriction 125 // uu has shape [elemsize, ncomp, nelem] 126 // vv has shape [nnodes, ncomp] 127 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 128 for (CeedInt d = 0; d < ncomp; d++) 129 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 130 // Iteration bound set to discard padding elements 131 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 132 vv[imode == CEED_NONINTERLACED 133 ? impl->indices[j+e*elemsize]+nnodes*d 134 : d+ncomp*impl->indices[j+e*elemsize]] 135 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 136 } 137 } 138 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 139 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 140 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 141 *request = NULL; 142 return 0; 143 } 144 145 //------------------------------------------------------------------------------ 146 // ElemRestriction Apply - Common Sizes 147 //------------------------------------------------------------------------------ 148 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 149 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 150 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 151 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, imode, 152 u, v, request); 153 } 154 155 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 156 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 157 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 158 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, imode, 159 u, v, request); 160 161 } 162 163 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 164 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 165 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 166 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, imode, 167 u, v, request); 168 } 169 170 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 171 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 172 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 173 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, imode, 174 u, v, request); 175 } 176 177 //------------------------------------------------------------------------------ 178 // ElemRestriction Apply 179 //------------------------------------------------------------------------------ 180 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 181 CeedTransposeMode tmode, CeedVector u, 182 CeedVector v, CeedRequest *request) { 183 int ierr; 184 CeedInt numblk, ncomp, blksize; 185 CeedInterlaceMode imode = CEED_NONINTERLACED; 186 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 187 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 188 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 189 CeedElemRestriction_Ref *impl; 190 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 191 if (impl->indices) 192 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 193 194 CeedInt idx = -1; 195 if (blksize < 10) 196 idx = 10*ncomp + blksize; 197 switch (idx) { 198 case 11: 199 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, imode, 200 u, v, request); 201 break; 202 case 18: 203 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, imode, 204 u, v, request); 205 break; 206 case 31: 207 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, imode, 208 u, v, request); 209 break; 210 case 38: 211 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, imode, 212 u, v, request); 213 break; 214 default: 215 // LCOV_EXCL_START 216 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 217 tmode, imode, u, v, request); 218 // LCOV_EXCL_STOP 219 } 220 } 221 222 //------------------------------------------------------------------------------ 223 // ElemRestriction Apply Block 224 //------------------------------------------------------------------------------ 225 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 226 CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 227 CeedRequest *request) { 228 int ierr; 229 CeedInt ncomp, blksize; 230 CeedInterlaceMode imode = CEED_NONINTERLACED; 231 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 232 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 233 CeedElemRestriction_Ref *impl; 234 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 235 if (impl->indices) 236 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 237 238 CeedInt idx = -1; 239 if (blksize < 10) 240 idx = 10*ncomp + blksize; 241 switch (idx) { 242 case 11: 243 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, imode, 244 u, v, request); 245 break; 246 case 18: 247 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, imode, 248 u, v, request); 249 break; 250 case 31: 251 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, imode, 252 u, v, request); 253 break; 254 case 38: 255 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, imode, 256 u, v, request); 257 break; 258 default: 259 // LCOV_EXCL_START 260 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 261 tmode, imode, u, v, request); 262 // LCOV_EXCL_STOP 263 } 264 } 265 266 //------------------------------------------------------------------------------ 267 // ElemRestriction Destroy 268 //------------------------------------------------------------------------------ 269 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 270 int ierr; 271 CeedElemRestriction_Ref *impl; 272 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 273 274 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 275 ierr = CeedFree(&impl); CeedChk(ierr); 276 return 0; 277 } 278 279 //------------------------------------------------------------------------------ 280 // ElemRestriction Create 281 //------------------------------------------------------------------------------ 282 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 283 const CeedInt *indices, CeedElemRestriction r) { 284 int ierr; 285 CeedElemRestriction_Ref *impl; 286 CeedInt elemsize, nelem; 287 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 288 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 289 Ceed ceed; 290 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 291 292 if (mtype != CEED_MEM_HOST) 293 // LCOV_EXCL_START 294 return CeedError(ceed, 1, "Only MemType = HOST supported"); 295 // LCOV_EXCL_STOP 296 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 297 switch (cmode) { 298 case CEED_COPY_VALUES: 299 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 300 CeedChk(ierr); 301 memcpy(impl->indices_allocated, indices, 302 nelem * elemsize * sizeof(indices[0])); 303 impl->indices = impl->indices_allocated; 304 break; 305 case CEED_OWN_POINTER: 306 impl->indices_allocated = (CeedInt *)indices; 307 impl->indices = impl->indices_allocated; 308 break; 309 case CEED_USE_POINTER: 310 impl->indices = indices; 311 } 312 313 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 314 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 315 CeedElemRestrictionApply_Ref); CeedChk(ierr); 316 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 317 CeedElemRestrictionApplyBlock_Ref); 318 CeedChk(ierr); 319 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 320 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 321 return 0; 322 } 323 //------------------------------------------------------------------------------ 324 325