1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 24 CeedTransposeMode tmode, CeedInterlaceMode imode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, nnodes, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 35 voffset = start*blksize*elemsize*ncomp; 36 37 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 38 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 39 // Restriction from lvector to evector 40 // Perform: v = r * u 41 if (tmode == CEED_NOTRANSPOSE) { 42 // No indices provided, Identity Restriction 43 if (!impl->indices) { 44 CeedInt strides[3]; 45 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 46 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 47 CeedPragmaSIMD 48 for (CeedInt k = 0; k < ncomp; k++) 49 CeedPragmaSIMD 50 for (CeedInt n = 0; n < elemsize; n++) 51 CeedPragmaSIMD 52 for (CeedInt j = 0; j < blksize; j++) { 53 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 54 = uu[n*strides[0] + k*strides[1] + 55 CeedIntMin(e+j, nelem-1)*strides[2]]; 56 } 57 } else { 58 // Indices provided, standard or blocked restriction 59 // vv has shape [elemsize, ncomp, nelem], row-major 60 // uu has shape [nnodes, ncomp] 61 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 62 CeedPragmaSIMD 63 for (CeedInt d = 0; d < ncomp; d++) 64 CeedPragmaSIMD 65 for (CeedInt i = 0; i < elemsize*blksize; i++) 66 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 67 = uu[imode == CEED_NONINTERLACED 68 ? impl->indices[i+elemsize*e]+nnodes*d 69 : d+ncomp*impl->indices[i+elemsize*e]]; 70 } 71 } else { 72 // Restriction from evector to lvector 73 // Performing v += r^T * u 74 // No indices provided, Identity Restriction 75 if (!impl->indices) { 76 CeedInt strides[3]; 77 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 78 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 79 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 80 CeedPragmaSIMD 81 for (CeedInt k = 0; k < ncomp; k++) 82 CeedPragmaSIMD 83 for (CeedInt n = 0; n < elemsize; n++) 84 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 85 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 86 } else { 87 // Indices provided, standard or blocked restriction 88 // uu has shape [elemsize, ncomp, nelem] 89 // vv has shape [nnodes, ncomp] 90 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 91 for (CeedInt d = 0; d < ncomp; d++) 92 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 93 // Iteration bound set to discard padding elements 94 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 95 vv[imode == CEED_NONINTERLACED 96 ? impl->indices[j+e*elemsize]+nnodes*d 97 : d+ncomp*impl->indices[j+e*elemsize]] 98 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 99 } 100 } 101 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 102 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 103 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 104 *request = NULL; 105 return 0; 106 } 107 108 //------------------------------------------------------------------------------ 109 // ElemRestriction Apply - Common Sizes 110 //------------------------------------------------------------------------------ 111 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 112 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 113 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 114 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, imode, 115 u, v, request); 116 } 117 118 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 119 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 120 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 121 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, imode, 122 u, v, request); 123 124 } 125 126 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 127 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 128 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 129 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, imode, 130 u, v, request); 131 } 132 133 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 134 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 135 CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) { 136 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, imode, 137 u, v, request); 138 } 139 140 //------------------------------------------------------------------------------ 141 // ElemRestriction Apply 142 //------------------------------------------------------------------------------ 143 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 144 CeedTransposeMode tmode, CeedVector u, 145 CeedVector v, CeedRequest *request) { 146 int ierr; 147 CeedInt numblk, ncomp, blksize; 148 CeedInterlaceMode imode = CEED_NONINTERLACED; 149 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 152 CeedElemRestriction_Ref *impl; 153 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 154 if (impl->indices) 155 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 156 157 CeedInt idx = -1; 158 if (blksize < 10) 159 idx = 10*ncomp + blksize; 160 switch (idx) { 161 case 11: 162 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, imode, 163 u, v, request); 164 break; 165 case 18: 166 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, imode, 167 u, v, request); 168 break; 169 case 31: 170 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, imode, 171 u, v, request); 172 break; 173 case 38: 174 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, imode, 175 u, v, request); 176 break; 177 default: 178 // LCOV_EXCL_START 179 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 180 tmode, imode, u, v, request); 181 // LCOV_EXCL_STOP 182 } 183 } 184 185 //------------------------------------------------------------------------------ 186 // ElemRestriction Apply Block 187 //------------------------------------------------------------------------------ 188 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 189 CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 190 CeedRequest *request) { 191 int ierr; 192 CeedInt ncomp, blksize; 193 CeedInterlaceMode imode = CEED_NONINTERLACED; 194 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 195 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 196 CeedElemRestriction_Ref *impl; 197 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 198 if (impl->indices) 199 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 200 201 CeedInt idx = -1; 202 if (blksize < 10) 203 idx = 10*ncomp + blksize; 204 switch (idx) { 205 case 11: 206 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, imode, 207 u, v, request); 208 break; 209 case 18: 210 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, imode, 211 u, v, request); 212 break; 213 case 31: 214 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, imode, 215 u, v, request); 216 break; 217 case 38: 218 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, imode, 219 u, v, request); 220 break; 221 default: 222 // LCOV_EXCL_START 223 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 224 tmode, imode, u, v, request); 225 // LCOV_EXCL_STOP 226 } 227 } 228 229 //------------------------------------------------------------------------------ 230 // ElemRestriction Destroy 231 //------------------------------------------------------------------------------ 232 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 233 int ierr; 234 CeedElemRestriction_Ref *impl; 235 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 236 237 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 238 ierr = CeedFree(&impl); CeedChk(ierr); 239 return 0; 240 } 241 242 //------------------------------------------------------------------------------ 243 // ElemRestriction Create 244 //------------------------------------------------------------------------------ 245 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 246 const CeedInt *indices, CeedElemRestriction r) { 247 int ierr; 248 CeedElemRestriction_Ref *impl; 249 CeedInt elemsize, nelem; 250 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 251 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 252 Ceed ceed; 253 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 254 255 if (mtype != CEED_MEM_HOST) 256 // LCOV_EXCL_START 257 return CeedError(ceed, 1, "Only MemType = HOST supported"); 258 // LCOV_EXCL_STOP 259 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 260 switch (cmode) { 261 case CEED_COPY_VALUES: 262 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 263 CeedChk(ierr); 264 memcpy(impl->indices_allocated, indices, 265 nelem * elemsize * sizeof(indices[0])); 266 impl->indices = impl->indices_allocated; 267 break; 268 case CEED_OWN_POINTER: 269 impl->indices_allocated = (CeedInt *)indices; 270 impl->indices = impl->indices_allocated; 271 break; 272 case CEED_USE_POINTER: 273 impl->indices = indices; 274 } 275 276 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 277 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 278 CeedElemRestrictionApply_Ref); CeedChk(ierr); 279 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 280 CeedElemRestrictionApplyBlock_Ref); 281 CeedChk(ierr); 282 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 283 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 284 return 0; 285 } 286 //------------------------------------------------------------------------------ 287