1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 24 CeedTransposeMode tmode, CeedTransposeMode lmode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);; 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, nnodes, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 35 voffset = start*blksize*elemsize*ncomp; 36 37 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 38 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 39 // Restriction from lvector to evector 40 // Perform: v = r * u 41 if (tmode == CEED_NOTRANSPOSE) { 42 // No indices provided, Identity Restriction 43 if (!impl->indices) { 44 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 45 CeedPragmaSIMD 46 for (CeedInt k = 0; k < ncomp*elemsize; k++) 47 CeedPragmaSIMD 48 for (CeedInt j = 0; j < blksize; j++) 49 vv[e*elemsize*ncomp + k*blksize + j - voffset] 50 = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k]; 51 } else { 52 // Indices provided, standard or blocked restriction 53 // vv has shape [elemsize, ncomp, nelem], row-major 54 // uu has shape [nnodes, ncomp] 55 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 56 CeedPragmaSIMD 57 for (CeedInt d = 0; d < ncomp; d++) 58 CeedPragmaSIMD 59 for (CeedInt i = 0; i < elemsize*blksize; i++) 60 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 61 = uu[lmode == CEED_NOTRANSPOSE 62 ? impl->indices[i+elemsize*e]+nnodes*d 63 : d+ncomp*impl->indices[i+elemsize*e]]; 64 } 65 } else { 66 // Restriction from evector to lvector 67 // Performing v += r^T * u 68 // No indices provided, Identity Restriction 69 if (!impl->indices) { 70 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 71 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 72 CeedPragmaSIMD 73 for (CeedInt k = 0; k < ncomp*elemsize; k++) 74 vv[(e+j)*ncomp*elemsize + k] 75 += uu[e*elemsize*ncomp + k*blksize + j - voffset]; 76 } else { 77 // Indices provided, standard or blocked restriction 78 // uu has shape [elemsize, ncomp, nelem] 79 // vv has shape [nnodes, ncomp] 80 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 81 for (CeedInt d = 0; d < ncomp; d++) 82 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 83 // Iteration bound set to discard padding elements 84 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 85 vv[lmode == CEED_NOTRANSPOSE 86 ? impl->indices[j+e*elemsize]+nnodes*d 87 : d+ncomp*impl->indices[j+e*elemsize]] 88 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 89 } 90 } 91 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 92 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 93 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 94 *request = NULL; 95 return 0; 96 } 97 98 //------------------------------------------------------------------------------ 99 // ElemRestriction Apply - Common Sizes 100 //------------------------------------------------------------------------------ 101 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 102 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 103 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 104 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, lmode, 105 u, v, request); 106 } 107 108 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 109 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 110 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 111 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, lmode, 112 u, v, request); 113 114 } 115 116 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 117 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 118 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 119 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, lmode, 120 u, v, request); 121 } 122 123 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 124 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 125 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 126 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, lmode, 127 u, v, request); 128 } 129 130 //------------------------------------------------------------------------------ 131 // ElemRestriction Apply 132 //------------------------------------------------------------------------------ 133 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 134 CeedTransposeMode tmode, CeedVector u, 135 CeedVector v, CeedRequest *request) { 136 int ierr; 137 CeedInt numblk, ncomp, blksize; 138 CeedTransposeMode lmode; 139 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 140 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 141 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 142 ierr = CeedElemRestrictionGetLMode(r, &lmode); CeedChk(ierr); 143 144 CeedInt idx = -1; 145 if (blksize < 10) 146 idx = 10*ncomp + blksize; 147 switch (idx) { 148 case 11: 149 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, lmode, 150 u, v, request); 151 break; 152 case 18: 153 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, lmode, 154 u, v, request); 155 break; 156 case 31: 157 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, lmode, 158 u, v, request); 159 break; 160 case 38: 161 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, lmode, 162 u, v, request); 163 break; 164 default: 165 // LCOV_EXCL_START 166 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 167 tmode, lmode, u, v, request); 168 // LCOV_EXCL_STOP 169 } 170 } 171 172 //------------------------------------------------------------------------------ 173 // ElemRestriction Apply Block 174 //------------------------------------------------------------------------------ 175 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 176 CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 177 CeedRequest *request) { 178 int ierr; 179 CeedInt ncomp, blksize; 180 CeedTransposeMode lmode; 181 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 182 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 183 ierr = CeedElemRestrictionGetLMode(r, &lmode); CeedChk(ierr); 184 185 CeedInt idx = -1; 186 if (blksize < 10) 187 idx = 10*ncomp + blksize; 188 switch (idx) { 189 case 11: 190 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, lmode, 191 u, v, request); 192 break; 193 case 18: 194 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, lmode, 195 u, v, request); 196 break; 197 case 31: 198 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, lmode, 199 u, v, request); 200 break; 201 case 38: 202 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, lmode, 203 u, v, request); 204 break; 205 default: 206 // LCOV_EXCL_START 207 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 208 tmode, lmode, u, v, request); 209 // LCOV_EXCL_STOP 210 } 211 } 212 213 //------------------------------------------------------------------------------ 214 // ElemRestriction Destroy 215 //------------------------------------------------------------------------------ 216 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 217 int ierr; 218 CeedElemRestriction_Ref *impl; 219 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 220 221 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 222 ierr = CeedFree(&impl); CeedChk(ierr); 223 return 0; 224 } 225 226 //------------------------------------------------------------------------------ 227 // ElemRestriction Create 228 //------------------------------------------------------------------------------ 229 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 230 const CeedInt *indices, CeedElemRestriction r) { 231 int ierr; 232 CeedElemRestriction_Ref *impl; 233 CeedInt elemsize, nelem; 234 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 235 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 236 Ceed ceed; 237 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 238 239 if (mtype != CEED_MEM_HOST) 240 // LCOV_EXCL_START 241 return CeedError(ceed, 1, "Only MemType = HOST supported"); 242 // LCOV_EXCL_STOP 243 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 244 switch (cmode) { 245 case CEED_COPY_VALUES: 246 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 247 CeedChk(ierr); 248 memcpy(impl->indices_allocated, indices, 249 nelem * elemsize * sizeof(indices[0])); 250 impl->indices = impl->indices_allocated; 251 break; 252 case CEED_OWN_POINTER: 253 impl->indices_allocated = (CeedInt *)indices; 254 impl->indices = impl->indices_allocated; 255 break; 256 case CEED_USE_POINTER: 257 impl->indices = indices; 258 } 259 260 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 261 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 262 CeedElemRestrictionApply_Ref); CeedChk(ierr); 263 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 264 CeedElemRestrictionApplyBlock_Ref); 265 CeedChk(ierr); 266 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 267 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 268 return 0; 269 } 270 //------------------------------------------------------------------------------ 271