1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 24 CeedTransposeMode tmode, CeedInterlaceMode imode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, nnodes, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 35 voffset = start*blksize*elemsize*ncomp; 36 37 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 38 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 39 // Restriction from lvector to evector 40 // Perform: v = r * u 41 if (tmode == CEED_NOTRANSPOSE) { 42 // No indices provided, Identity Restriction 43 if (!impl->indices) { 44 CeedInt strides[3]; 45 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 46 bool backendstrides; 47 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 48 CeedChk(ierr); 49 if (backendstrides) { 50 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 51 CeedPragmaSIMD 52 for (CeedInt k = 0; k < ncomp*elemsize; k++) 53 CeedPragmaSIMD 54 for (CeedInt j = 0; j < blksize; j++) { 55 vv[e*elemsize*ncomp + k*blksize + j - voffset] = 56 uu[e*elemsize*ncomp + k*CeedIntMin(blksize, nelem-e) + 57 CeedIntMin(j, nelem-e)]; 58 } 59 } else { 60 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 61 CeedPragmaSIMD 62 for (CeedInt k = 0; k < ncomp; k++) 63 CeedPragmaSIMD 64 for (CeedInt n = 0; n < elemsize; n++) 65 CeedPragmaSIMD 66 for (CeedInt j = 0; j < blksize; j++) { 67 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 68 = uu[n*strides[0] + k*strides[1] + 69 CeedIntMin(e+j, nelem-1)*strides[2]]; 70 } 71 } 72 } else { 73 // Indices provided, standard or blocked restriction 74 // vv has shape [elemsize, ncomp, nelem], row-major 75 // uu has shape [nnodes, ncomp] 76 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 77 CeedPragmaSIMD 78 for (CeedInt d = 0; d < ncomp; d++) 79 CeedPragmaSIMD 80 for (CeedInt i = 0; i < elemsize*blksize; i++) 81 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 82 = uu[imode == CEED_NONINTERLACED 83 ? impl->indices[i+elemsize*e]+nnodes*d 84 : d+ncomp*impl->indices[i+elemsize*e]]; 85 } 86 } else { 87 // Restriction from evector to lvector 88 // Performing v += r^T * u 89 // No indices provided, Identity Restriction 90 if (!impl->indices) { 91 CeedInt strides[3]; 92 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 93 bool backendstrides; 94 ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides); 95 CeedChk(ierr); 96 if (backendstrides) { 97 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 98 CeedPragmaSIMD 99 for (CeedInt k = 0; k < ncomp*elemsize; k++) 100 CeedPragmaSIMD 101 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 102 vv[e*elemsize*ncomp + k*CeedIntMin(blksize, nelem-e) + j] 103 += uu[e*elemsize*ncomp + k*blksize + j - voffset]; 104 } else { 105 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 106 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 107 CeedPragmaSIMD 108 for (CeedInt k = 0; k < ncomp; k++) 109 CeedPragmaSIMD 110 for (CeedInt n = 0; n < elemsize; n++) 111 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 112 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 113 } 114 } else { 115 // Indices provided, standard or blocked restriction 116 // uu has shape [elemsize, ncomp, nelem] 117 // vv has shape [nnodes, ncomp] 118 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 119 for (CeedInt d = 0; d < ncomp; d++) 120 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 121 // Iteration bound set to discard padding elements 122 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 123 vv[imode == CEED_NONINTERLACED 124 ? impl->indices[j+e*elemsize]+nnodes*d 125 : d+ncomp*impl->indices[j+e*elemsize]] 126 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 127 } 128 } 129 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 130 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 131 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 132 *request = NULL; 133 return 0; 134 } 135 136 //------------------------------------------------------------------------------ 137 // ElemRestriction Apply - Common Sizes 138 //------------------------------------------------------------------------------ 139 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 140 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 141 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 142 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, imode, 143 u, v, request); 144 } 145 146 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 147 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 148 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 149 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, imode, 150 u, v, request); 151 152 } 153 154 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 155 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 156 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 157 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, imode, 158 u, v, request); 159 } 160 161 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 162 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 163 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 164 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, imode, 165 u, v, request); 166 } 167 168 //------------------------------------------------------------------------------ 169 // ElemRestriction Apply 170 //------------------------------------------------------------------------------ 171 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 172 CeedTransposeMode tmode, CeedVector u, 173 CeedVector v, CeedRequest *request) { 174 int ierr; 175 CeedInt numblk, ncomp, blksize; 176 CeedInterlaceMode imode = CEED_NONINTERLACED; 177 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 178 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 179 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 180 CeedElemRestriction_Ref *impl; 181 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 182 if (impl->indices) 183 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 184 185 CeedInt idx = -1; 186 if (blksize < 10) 187 idx = 10*ncomp + blksize; 188 switch (idx) { 189 case 11: 190 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, imode, 191 u, v, request); 192 break; 193 case 18: 194 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, imode, 195 u, v, request); 196 break; 197 case 31: 198 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, imode, 199 u, v, request); 200 break; 201 case 38: 202 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, imode, 203 u, v, request); 204 break; 205 default: 206 // LCOV_EXCL_START 207 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 208 tmode, imode, u, v, request); 209 // LCOV_EXCL_STOP 210 } 211 } 212 213 //------------------------------------------------------------------------------ 214 // ElemRestriction Apply Block 215 //------------------------------------------------------------------------------ 216 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 217 CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 218 CeedRequest *request) { 219 int ierr; 220 CeedInt ncomp, blksize; 221 CeedInterlaceMode imode = CEED_NONINTERLACED; 222 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 223 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 224 CeedElemRestriction_Ref *impl; 225 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 226 if (impl->indices) 227 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 228 229 CeedInt idx = -1; 230 if (blksize < 10) 231 idx = 10*ncomp + blksize; 232 switch (idx) { 233 case 11: 234 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, imode, 235 u, v, request); 236 break; 237 case 18: 238 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, imode, 239 u, v, request); 240 break; 241 case 31: 242 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, imode, 243 u, v, request); 244 break; 245 case 38: 246 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, imode, 247 u, v, request); 248 break; 249 default: 250 // LCOV_EXCL_START 251 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 252 tmode, imode, u, v, request); 253 // LCOV_EXCL_STOP 254 } 255 } 256 257 //------------------------------------------------------------------------------ 258 // ElemRestriction Destroy 259 //------------------------------------------------------------------------------ 260 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 261 int ierr; 262 CeedElemRestriction_Ref *impl; 263 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 264 265 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 266 ierr = CeedFree(&impl); CeedChk(ierr); 267 return 0; 268 } 269 270 //------------------------------------------------------------------------------ 271 // ElemRestriction Create 272 //------------------------------------------------------------------------------ 273 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 274 const CeedInt *indices, CeedElemRestriction r) { 275 int ierr; 276 CeedElemRestriction_Ref *impl; 277 CeedInt elemsize, nelem; 278 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 279 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 280 Ceed ceed; 281 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 282 283 if (mtype != CEED_MEM_HOST) 284 // LCOV_EXCL_START 285 return CeedError(ceed, 1, "Only MemType = HOST supported"); 286 // LCOV_EXCL_STOP 287 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 288 switch (cmode) { 289 case CEED_COPY_VALUES: 290 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 291 CeedChk(ierr); 292 memcpy(impl->indices_allocated, indices, 293 nelem * elemsize * sizeof(indices[0])); 294 impl->indices = impl->indices_allocated; 295 break; 296 case CEED_OWN_POINTER: 297 impl->indices_allocated = (CeedInt *)indices; 298 impl->indices = impl->indices_allocated; 299 break; 300 case CEED_USE_POINTER: 301 impl->indices = indices; 302 } 303 304 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 305 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 306 CeedElemRestrictionApply_Ref); CeedChk(ierr); 307 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 308 CeedElemRestrictionApplyBlock_Ref); 309 CeedChk(ierr); 310 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 311 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 312 return 0; 313 } 314 //------------------------------------------------------------------------------ 315