1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 //------------------------------------------------------------------------------ 20 // Core ElemRestriction Apply Code 21 //------------------------------------------------------------------------------ 22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 23 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 24 CeedTransposeMode tmode, CeedTransposeMode lmode, CeedVector u, 25 CeedVector v, CeedRequest *request) { 26 int ierr; 27 CeedElemRestriction_Ref *impl; 28 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);; 29 const CeedScalar *uu; 30 CeedScalar *vv; 31 CeedInt nelem, elemsize, nnodes, voffset; 32 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 33 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 34 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 35 voffset = start*blksize*elemsize*ncomp; 36 37 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 38 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 39 // Restriction from lvector to evector 40 // Perform: v = r * u 41 if (tmode == CEED_NOTRANSPOSE) { 42 // No indices provided, Identity Restriction 43 if (!impl->indices) { 44 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 45 CeedPragmaSIMD 46 for (CeedInt k = 0; k < ncomp*elemsize; k++) 47 CeedPragmaSIMD 48 for (CeedInt j = 0; j < blksize; j++) 49 vv[e*elemsize*ncomp + k*blksize + j - voffset] 50 = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k]; 51 } else { 52 // Indices provided, standard or blocked restriction 53 // vv has shape [elemsize, ncomp, nelem], row-major 54 // uu has shape [nnodes, ncomp] 55 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 56 CeedPragmaSIMD 57 for (CeedInt d = 0; d < ncomp; d++) 58 CeedPragmaSIMD 59 for (CeedInt i = 0; i < elemsize*blksize; i++) 60 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 61 = uu[lmode == CEED_NOTRANSPOSE 62 ? impl->indices[i+elemsize*e]+nnodes*d 63 : d+ncomp*impl->indices[i+elemsize*e]]; 64 } 65 } else { 66 // Restriction from evector to lvector 67 // Performing v += r^T * u 68 // No indices provided, Identity Restriction 69 if (!impl->indices) { 70 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 71 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 72 CeedPragmaSIMD 73 for (CeedInt k = 0; k < ncomp*elemsize; k++) 74 vv[(e+j)*ncomp*elemsize + k] 75 += uu[e*elemsize*ncomp + k*blksize + j - voffset]; 76 } else { 77 // Indices provided, standard or blocked restriction 78 // uu has shape [elemsize, ncomp, nelem] 79 // vv has shape [nnodes, ncomp] 80 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 81 for (CeedInt d = 0; d < ncomp; d++) 82 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 83 // Iteration bound set to discard padding elements 84 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 85 vv[lmode == CEED_NOTRANSPOSE 86 ? impl->indices[j+e*elemsize]+nnodes*d 87 : d+ncomp*impl->indices[j+e*elemsize]] 88 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 89 } 90 } 91 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 92 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 93 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 94 *request = NULL; 95 return 0; 96 } 97 98 //------------------------------------------------------------------------------ 99 // ElemRestriction Apply - Common Sizes 100 //------------------------------------------------------------------------------ 101 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 102 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 103 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 104 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, lmode, 105 u, v, request); 106 } 107 108 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 109 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 110 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 111 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, lmode, 112 u, v, request); 113 114 } 115 116 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 117 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 118 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 119 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, lmode, 120 u, v, request); 121 } 122 123 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 124 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 125 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 126 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, lmode, 127 u, v, request); 128 } 129 130 //------------------------------------------------------------------------------ 131 // ElemRestriction Apply 132 //------------------------------------------------------------------------------ 133 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 134 CeedTransposeMode tmode, 135 CeedTransposeMode lmode, CeedVector u, 136 CeedVector v, CeedRequest *request) { 137 int ierr; 138 CeedInt numblk, ncomp, blksize; 139 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 140 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 141 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 142 143 CeedInt idx = -1; 144 if (blksize < 10) 145 idx = 10*ncomp + blksize; 146 switch (idx) { 147 case 11: 148 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, lmode, 149 u, v, request); 150 break; 151 case 18: 152 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, lmode, 153 u, v, request); 154 break; 155 case 31: 156 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, lmode, 157 u, v, request); 158 break; 159 case 38: 160 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, lmode, 161 u, v, request); 162 break; 163 default: 164 // LCOV_EXCL_START 165 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 166 tmode, lmode, u, v, request); 167 // LCOV_EXCL_STOP 168 } 169 } 170 171 //------------------------------------------------------------------------------ 172 // ElemRestriction Apply Block 173 //------------------------------------------------------------------------------ 174 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 175 CeedInt block, CeedTransposeMode tmode, CeedTransposeMode lmode, 176 CeedVector u, CeedVector v, CeedRequest *request) { 177 int ierr; 178 CeedInt ncomp, blksize; 179 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 180 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 181 182 CeedInt idx = -1; 183 if (blksize < 10) 184 idx = 10*ncomp + blksize; 185 switch (idx) { 186 case 11: 187 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, lmode, 188 u, v, request); 189 break; 190 case 18: 191 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, lmode, 192 u, v, request); 193 break; 194 case 31: 195 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, lmode, 196 u, v, request); 197 break; 198 case 38: 199 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, lmode, 200 u, v, request); 201 break; 202 default: 203 // LCOV_EXCL_START 204 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 205 tmode, lmode, u, v, request); 206 // LCOV_EXCL_STOP 207 } 208 } 209 210 //------------------------------------------------------------------------------ 211 // ElemRestriction Destroy 212 //------------------------------------------------------------------------------ 213 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 214 int ierr; 215 CeedElemRestriction_Ref *impl; 216 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 217 218 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 219 ierr = CeedFree(&impl); CeedChk(ierr); 220 return 0; 221 } 222 223 //------------------------------------------------------------------------------ 224 // ElemRestriction Create 225 //------------------------------------------------------------------------------ 226 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 227 const CeedInt *indices, CeedElemRestriction r) { 228 int ierr; 229 CeedElemRestriction_Ref *impl; 230 CeedInt elemsize, nelem; 231 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 232 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 233 Ceed ceed; 234 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 235 236 if (mtype != CEED_MEM_HOST) 237 // LCOV_EXCL_START 238 return CeedError(ceed, 1, "Only MemType = HOST supported"); 239 // LCOV_EXCL_STOP 240 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 241 switch (cmode) { 242 case CEED_COPY_VALUES: 243 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 244 CeedChk(ierr); 245 memcpy(impl->indices_allocated, indices, 246 nelem * elemsize * sizeof(indices[0])); 247 impl->indices = impl->indices_allocated; 248 break; 249 case CEED_OWN_POINTER: 250 impl->indices_allocated = (CeedInt *)indices; 251 impl->indices = impl->indices_allocated; 252 break; 253 case CEED_USE_POINTER: 254 impl->indices = indices; 255 } 256 257 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 258 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 259 CeedElemRestrictionApply_Ref); CeedChk(ierr); 260 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 261 CeedElemRestrictionApplyBlock_Ref); 262 CeedChk(ierr); 263 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 264 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 265 return 0; 266 } 267 //------------------------------------------------------------------------------ 268