1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 24 CeedTransposeMode tmode, CeedInterlaceMode imode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, nnodes, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 35 voffset = start*blksize*elemsize*ncomp; 36 37 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 38 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 39 // Restriction from L-vector to E-vector 40 // Perform: v = r * u 41 if (tmode == CEED_NOTRANSPOSE) { 42 // No indices provided, Identity Restriction 43 if (!impl->indices) { 44 bool backendstrides; 45 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 46 CeedChk(ierr); 47 if (backendstrides) { 48 // CPU backend strides are {1, elemsize, elemsize*ncomp} 49 // This if branch is left separate to allow better inlining 50 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 51 CeedPragmaSIMD 52 for (CeedInt k = 0; k < ncomp; k++) 53 CeedPragmaSIMD 54 for (CeedInt n = 0; n < elemsize; n++) 55 CeedPragmaSIMD 56 for (CeedInt j = 0; j < blksize; j++) 57 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 58 = uu[n + k*elemsize + 59 CeedIntMin(e+j, nelem-1)*elemsize*ncomp]; 60 } else { 61 // User provided strides 62 CeedInt strides[3]; 63 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 64 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 65 CeedPragmaSIMD 66 for (CeedInt k = 0; k < ncomp; k++) 67 CeedPragmaSIMD 68 for (CeedInt n = 0; n < elemsize; n++) 69 CeedPragmaSIMD 70 for (CeedInt j = 0; j < blksize; j++) 71 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 72 = uu[n*strides[0] + k*strides[1] + 73 CeedIntMin(e+j, nelem-1)*strides[2]]; 74 } 75 } else { 76 // Indices provided, standard or blocked restriction 77 // vv has shape [elemsize, ncomp, nelem], row-major 78 // uu has shape [nnodes, ncomp] 79 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 80 CeedPragmaSIMD 81 for (CeedInt d = 0; d < ncomp; d++) 82 CeedPragmaSIMD 83 for (CeedInt i = 0; i < elemsize*blksize; i++) 84 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 85 = uu[imode == CEED_NONINTERLACED 86 ? impl->indices[i+elemsize*e]+nnodes*d 87 : d+ncomp*impl->indices[i+elemsize*e]]; 88 } 89 } else { 90 // Restriction from E-vector to L-vector 91 // Performing v += r^T * u 92 // No indices provided, Identity Restriction 93 if (!impl->indices) { 94 bool backendstrides; 95 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 96 CeedChk(ierr); 97 if (backendstrides) { 98 // CPU backend strides are {1, elemsize, elemsize*ncomp} 99 // This if brach is left separate to allow better inlining 100 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 101 CeedPragmaSIMD 102 for (CeedInt k = 0; k < ncomp; k++) 103 CeedPragmaSIMD 104 for (CeedInt n = 0; n < elemsize; n++) 105 CeedPragmaSIMD 106 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 107 vv[n + k*elemsize + (e+j)*elemsize*ncomp] 108 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 109 } else { 110 // User provided strides 111 CeedInt strides[3]; 112 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 113 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 114 CeedPragmaSIMD 115 for (CeedInt k = 0; k < ncomp; k++) 116 CeedPragmaSIMD 117 for (CeedInt n = 0; n < elemsize; n++) 118 CeedPragmaSIMD 119 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 120 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 121 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 122 } 123 } else { 124 // Indices provided, standard or blocked restriction 125 // uu has shape [elemsize, ncomp, nelem] 126 // vv has shape [nnodes, ncomp] 127 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 128 for (CeedInt d = 0; d < ncomp; d++) 129 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 130 // Iteration bound set to discard padding elements 131 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 132 vv[imode == CEED_NONINTERLACED 133 ? impl->indices[j+e*elemsize]+nnodes*d 134 : d+ncomp*impl->indices[j+e*elemsize]] 135 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 136 } 137 } 138 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 139 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 140 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 141 *request = NULL; 142 return 0; 143 } 144 145 //------------------------------------------------------------------------------ 146 // ElemRestriction Apply - Common Sizes 147 //------------------------------------------------------------------------------ 148 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 149 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 150 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 151 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, imode, 152 u, v, request); 153 } 154 155 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 156 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 157 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 158 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, imode, 159 u, v, request); 160 161 } 162 163 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 164 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 165 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 166 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, imode, 167 u, v, request); 168 } 169 170 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 171 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 172 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 173 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, imode, 174 u, v, request); 175 } 176 177 //------------------------------------------------------------------------------ 178 // ElemRestriction Apply 179 //------------------------------------------------------------------------------ 180 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 181 CeedTransposeMode tmode, CeedVector u, 182 CeedVector v, CeedRequest *request) { 183 int ierr; 184 CeedInt numblk, ncomp, blksize; 185 CeedInterlaceMode imode = CEED_NONINTERLACED; 186 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 187 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 188 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 189 CeedElemRestriction_Ref *impl; 190 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 191 if (impl->indices) 192 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 193 194 CeedInt idx = -1; 195 if (blksize < 10) 196 idx = 10*ncomp + blksize; 197 switch (idx) { 198 case 11: 199 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, imode, 200 u, v, request); 201 case 18: 202 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, imode, 203 u, v, request); 204 case 31: 205 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, imode, 206 u, v, request); 207 case 38: 208 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, imode, 209 u, v, request); 210 default: 211 // LCOV_EXCL_START 212 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 213 tmode, imode, u, v, request); 214 // LCOV_EXCL_STOP 215 } 216 } 217 218 //------------------------------------------------------------------------------ 219 // ElemRestriction Apply Block 220 //------------------------------------------------------------------------------ 221 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 222 CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 223 CeedRequest *request) { 224 int ierr; 225 CeedInt ncomp, blksize; 226 CeedInterlaceMode imode = CEED_NONINTERLACED; 227 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 228 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 229 CeedElemRestriction_Ref *impl; 230 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 231 if (impl->indices) 232 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 233 234 CeedInt idx = -1; 235 if (blksize < 10) 236 idx = 10*ncomp + blksize; 237 switch (idx) { 238 case 11: 239 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, imode, 240 u, v, request); 241 case 18: 242 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, imode, 243 u, v, request); 244 case 31: 245 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, imode, 246 u, v, request); 247 case 38: 248 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, imode, 249 u, v, request); 250 default: 251 // LCOV_EXCL_START 252 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 253 tmode, imode, u, v, request); 254 // LCOV_EXCL_STOP 255 } 256 } 257 258 //------------------------------------------------------------------------------ 259 // ElemRestriction Destroy 260 //------------------------------------------------------------------------------ 261 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 262 int ierr; 263 CeedElemRestriction_Ref *impl; 264 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 265 266 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 267 ierr = CeedFree(&impl); CeedChk(ierr); 268 return 0; 269 } 270 271 //------------------------------------------------------------------------------ 272 // ElemRestriction Create 273 //------------------------------------------------------------------------------ 274 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 275 const CeedInt *indices, CeedElemRestriction r) { 276 int ierr; 277 CeedElemRestriction_Ref *impl; 278 CeedInt elemsize, nelem; 279 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 280 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 281 Ceed ceed; 282 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 283 284 if (mtype != CEED_MEM_HOST) 285 // LCOV_EXCL_START 286 return CeedError(ceed, 1, "Only MemType = HOST supported"); 287 // LCOV_EXCL_STOP 288 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 289 switch (cmode) { 290 case CEED_COPY_VALUES: 291 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 292 CeedChk(ierr); 293 memcpy(impl->indices_allocated, indices, 294 nelem * elemsize * sizeof(indices[0])); 295 impl->indices = impl->indices_allocated; 296 break; 297 case CEED_OWN_POINTER: 298 impl->indices_allocated = (CeedInt *)indices; 299 impl->indices = impl->indices_allocated; 300 break; 301 case CEED_USE_POINTER: 302 impl->indices = indices; 303 } 304 305 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 306 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 307 CeedElemRestrictionApply_Ref); CeedChk(ierr); 308 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 309 CeedElemRestrictionApplyBlock_Ref); 310 CeedChk(ierr); 311 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 312 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 313 return 0; 314 } 315 //------------------------------------------------------------------------------ 316 317