1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 20 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 21 CeedTransposeMode tmode, CeedTransposeMode lmode, CeedVector u, 22 CeedVector v, CeedRequest *request) { 23 int ierr; 24 CeedElemRestriction_Ref *impl; 25 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);; 26 const CeedScalar *uu; 27 CeedScalar *vv; 28 CeedInt nelem, elemsize, nnodes, voffset; 29 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 30 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 31 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 32 voffset = start*blksize*elemsize*ncomp; 33 34 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 35 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 36 // Restriction from lvector to evector 37 // Perform: v = r * u 38 if (tmode == CEED_NOTRANSPOSE) { 39 // No indices provided, Identity Restriction 40 if (!impl->indices) { 41 CeedPragmaSIMD 42 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 43 CeedPragmaSIMD 44 for (CeedInt j = 0; j < blksize; j++) 45 CeedPragmaSIMD 46 for (CeedInt k = 0; k < ncomp*elemsize; k++) 47 vv[e*elemsize*ncomp + k*blksize + j - voffset] 48 = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k]; 49 } else { 50 // Indices provided, standard or blocked restriction 51 // vv has shape [elemsize, ncomp, nelem], row-major 52 // uu has shape [nnodes, ncomp] 53 CeedPragmaSIMD 54 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 55 CeedPragmaSIMD 56 for (CeedInt d = 0; d < ncomp; d++) 57 CeedPragmaSIMD 58 for (CeedInt i = 0; i < elemsize*blksize; i++) 59 vv[i+elemsize*(d*blksize+ncomp*e) - voffset] 60 = uu[lmode == CEED_NOTRANSPOSE 61 ? impl->indices[i+elemsize*e]+nnodes*d 62 : d+ncomp*impl->indices[i+elemsize*e]]; 63 } 64 } else { 65 // Restriction from evector to lvector 66 // Performing v += r^T * u 67 // No indices provided, Identity Restriction 68 if (!impl->indices) { 69 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 70 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 71 for (CeedInt k = 0; k < ncomp*elemsize; k++) 72 vv[(e+j)*ncomp*elemsize + k] 73 += uu[e*elemsize*ncomp + k*blksize + j - voffset]; 74 } else { 75 // Indices provided, standard or blocked restriction 76 // uu has shape [elemsize, ncomp, nelem] 77 // vv has shape [nnodes, ncomp] 78 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 79 for (CeedInt d = 0; d < ncomp; d++) 80 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 81 // Iteration bound set to discard padding elements 82 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 83 vv[lmode == CEED_NOTRANSPOSE 84 ? impl->indices[j+e*elemsize]+nnodes*d 85 : d+ncomp*impl->indices[j+e*elemsize]] 86 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 87 } 88 } 89 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 90 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 91 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 92 *request = NULL; 93 return 0; 94 } 95 96 static int CeedElemRestrictionApply_Ref_Core_11(CeedElemRestriction r, 97 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 98 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 99 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, lmode, 100 u, v, request); 101 } 102 103 static int CeedElemRestrictionApply_Ref_Core_18(CeedElemRestriction r, 104 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 105 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 106 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, lmode, 107 u, v, request); 108 109 } 110 111 static int CeedElemRestrictionApply_Ref_Core_31(CeedElemRestriction r, 112 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 113 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 114 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, lmode, 115 u, v, request); 116 } 117 118 static int CeedElemRestrictionApply_Ref_Core_38(CeedElemRestriction r, 119 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 120 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 121 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, lmode, 122 u, v, request); 123 } 124 125 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 126 CeedTransposeMode tmode, 127 CeedTransposeMode lmode, CeedVector u, 128 CeedVector v, CeedRequest *request) { 129 int ierr; 130 CeedInt numblk, ncomp, blksize; 131 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 132 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 133 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 134 135 CeedInt idx = -1; 136 if (blksize < 10) 137 idx = 10*ncomp + blksize; 138 switch (idx) { 139 case 11: 140 return CeedElemRestrictionApply_Ref_Core_11(r, 0, numblk, tmode, lmode, u, 141 v, request); 142 break; 143 case 18: 144 return CeedElemRestrictionApply_Ref_Core_18(r, 0, numblk, tmode, lmode, u, 145 v, request); 146 break; 147 case 31: 148 return CeedElemRestrictionApply_Ref_Core_31(r, 0, numblk, tmode, lmode, u, 149 v, request); 150 break; 151 case 38: 152 return CeedElemRestrictionApply_Ref_Core_38(r, 0, numblk, tmode, lmode, u, 153 v, request); 154 break; 155 default: 156 // LCOV_EXCL_START 157 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 158 tmode, lmode, u, v, request); 159 // LCOV_EXCL_STOP 160 } 161 } 162 163 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 164 CeedInt block, CeedTransposeMode tmode, CeedTransposeMode lmode, 165 CeedVector u, CeedVector v, CeedRequest *request) { 166 int ierr; 167 CeedInt ncomp, blksize; 168 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 169 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 170 171 CeedInt idx = -1; 172 if (blksize < 10) 173 idx = 10*ncomp + blksize; 174 switch (idx) { 175 case 11: 176 return CeedElemRestrictionApply_Ref_Core_11(r, block, block+1, tmode, lmode, 177 u, v, request); 178 break; 179 case 18: 180 return CeedElemRestrictionApply_Ref_Core_18(r, block, block+1, tmode, lmode, 181 u, v, request); 182 break; 183 case 31: 184 return CeedElemRestrictionApply_Ref_Core_31(r, block, block+1, tmode, lmode, 185 u, v, request); 186 break; 187 case 38: 188 return CeedElemRestrictionApply_Ref_Core_38(r, block, block+1, tmode, lmode, 189 u, v, request); 190 break; 191 default: 192 // LCOV_EXCL_START 193 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 194 tmode, lmode, u, v, request); 195 // LCOV_EXCL_STOP 196 } 197 } 198 199 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 200 int ierr; 201 CeedElemRestriction_Ref *impl; 202 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 203 204 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 205 ierr = CeedFree(&impl); CeedChk(ierr); 206 return 0; 207 } 208 209 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 210 const CeedInt *indices, CeedElemRestriction r) { 211 int ierr; 212 CeedElemRestriction_Ref *impl; 213 CeedInt elemsize, nelem; 214 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 215 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 216 Ceed ceed; 217 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 218 219 if (mtype != CEED_MEM_HOST) 220 // LCOV_EXCL_START 221 return CeedError(ceed, 1, "Only MemType = HOST supported"); 222 // LCOV_EXCL_STOP 223 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 224 switch (cmode) { 225 case CEED_COPY_VALUES: 226 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 227 CeedChk(ierr); 228 memcpy(impl->indices_allocated, indices, 229 nelem * elemsize * sizeof(indices[0])); 230 impl->indices = impl->indices_allocated; 231 break; 232 case CEED_OWN_POINTER: 233 impl->indices_allocated = (CeedInt *)indices; 234 impl->indices = impl->indices_allocated; 235 break; 236 case CEED_USE_POINTER: 237 impl->indices = indices; 238 } 239 240 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 241 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 242 CeedElemRestrictionApply_Ref); CeedChk(ierr); 243 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 244 CeedElemRestrictionApplyBlock_Ref); 245 CeedChk(ierr); 246 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 247 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 248 return 0; 249 } 250