1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-ref.h" 18 19 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 20 const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop, 21 CeedTransposeMode tmode, CeedTransposeMode lmode, CeedVector u, 22 CeedVector v, CeedRequest *request) { 23 int ierr; 24 CeedElemRestriction_Ref *impl; 25 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);; 26 const CeedScalar *uu; 27 CeedScalar *vv; 28 CeedInt nelem, elemsize, nnodes, voffset; 29 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 30 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 31 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 32 voffset = start*blksize*elemsize*ncomp; 33 34 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 35 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 36 // Restriction from lvector to evector 37 // Perform: v = r * u 38 if (tmode == CEED_NOTRANSPOSE) { 39 // No indices provided, Identity Restriction 40 if (!impl->indices) { 41 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 42 CeedPragmaSIMD 43 for (CeedInt k = 0; k < ncomp*elemsize; k++) 44 CeedPragmaSIMD 45 for (CeedInt j = 0; j < blksize; j++) 46 vv[e*elemsize*ncomp + k*blksize + j - voffset] 47 = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k]; 48 } else { 49 // Indices provided, standard or blocked restriction 50 // vv has shape [elemsize, ncomp, nelem], row-major 51 // uu has shape [nnodes, ncomp] 52 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 53 CeedPragmaSIMD 54 for (CeedInt d = 0; d < ncomp; d++) 55 CeedPragmaSIMD 56 for (CeedInt i = 0; i < elemsize*blksize; i++) 57 vv[elemsize*(d*blksize+ncomp*e) + i - voffset] 58 = uu[lmode == CEED_NOTRANSPOSE 59 ? impl->indices[i+elemsize*e]+nnodes*d 60 : d+ncomp*impl->indices[i+elemsize*e]]; 61 } 62 } else { 63 // Restriction from evector to lvector 64 // Performing v += r^T * u 65 // No indices provided, Identity Restriction 66 if (!impl->indices) { 67 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 68 for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 69 CeedPragmaSIMD 70 for (CeedInt k = 0; k < ncomp*elemsize; k++) 71 vv[(e+j)*ncomp*elemsize + k] 72 += uu[e*elemsize*ncomp + k*blksize + j - voffset]; 73 } else { 74 // Indices provided, standard or blocked restriction 75 // uu has shape [elemsize, ncomp, nelem] 76 // vv has shape [nnodes, ncomp] 77 for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 78 for (CeedInt d = 0; d < ncomp; d++) 79 for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 80 // Iteration bound set to discard padding elements 81 for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 82 vv[lmode == CEED_NOTRANSPOSE 83 ? impl->indices[j+e*elemsize]+nnodes*d 84 : d+ncomp*impl->indices[j+e*elemsize]] 85 += uu[elemsize*(d*blksize+ncomp*e) + j - voffset]; 86 } 87 } 88 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 89 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 90 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 91 *request = NULL; 92 return 0; 93 } 94 95 static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r, 96 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 97 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 98 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, lmode, 99 u, v, request); 100 } 101 102 static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r, 103 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 104 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 105 return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, lmode, 106 u, v, request); 107 108 } 109 110 static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r, 111 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 112 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 113 return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, lmode, 114 u, v, request); 115 } 116 117 static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r, 118 CeedInt start, CeedInt stop, CeedTransposeMode tmode, 119 CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) { 120 return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, lmode, 121 u, v, request); 122 } 123 124 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 125 CeedTransposeMode tmode, 126 CeedTransposeMode lmode, CeedVector u, 127 CeedVector v, CeedRequest *request) { 128 int ierr; 129 CeedInt numblk, ncomp, blksize; 130 ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 131 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 132 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 133 134 CeedInt idx = -1; 135 if (blksize < 10) 136 idx = 10*ncomp + blksize; 137 switch (idx) { 138 case 11: 139 return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, lmode, 140 u, v, request); 141 break; 142 case 18: 143 return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, lmode, 144 u, v, request); 145 break; 146 case 31: 147 return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, lmode, 148 u, v, request); 149 break; 150 case 38: 151 return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, lmode, 152 u, v, request); 153 break; 154 default: 155 // LCOV_EXCL_START 156 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk, 157 tmode, lmode, u, v, request); 158 // LCOV_EXCL_STOP 159 } 160 } 161 162 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 163 CeedInt block, CeedTransposeMode tmode, CeedTransposeMode lmode, 164 CeedVector u, CeedVector v, CeedRequest *request) { 165 int ierr; 166 CeedInt ncomp, blksize; 167 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 168 ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 169 170 CeedInt idx = -1; 171 if (blksize < 10) 172 idx = 10*ncomp + blksize; 173 switch (idx) { 174 case 11: 175 return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, lmode, 176 u, v, request); 177 break; 178 case 18: 179 return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, lmode, 180 u, v, request); 181 break; 182 case 31: 183 return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, lmode, 184 u, v, request); 185 break; 186 case 38: 187 return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, lmode, 188 u, v, request); 189 break; 190 default: 191 // LCOV_EXCL_START 192 return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1, 193 tmode, lmode, u, v, request); 194 // LCOV_EXCL_STOP 195 } 196 } 197 198 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 199 int ierr; 200 CeedElemRestriction_Ref *impl; 201 ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr); 202 203 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 204 ierr = CeedFree(&impl); CeedChk(ierr); 205 return 0; 206 } 207 208 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 209 const CeedInt *indices, CeedElemRestriction r) { 210 int ierr; 211 CeedElemRestriction_Ref *impl; 212 CeedInt elemsize, nelem; 213 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 214 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 215 Ceed ceed; 216 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 217 218 if (mtype != CEED_MEM_HOST) 219 // LCOV_EXCL_START 220 return CeedError(ceed, 1, "Only MemType = HOST supported"); 221 // LCOV_EXCL_STOP 222 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 223 switch (cmode) { 224 case CEED_COPY_VALUES: 225 ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated); 226 CeedChk(ierr); 227 memcpy(impl->indices_allocated, indices, 228 nelem * elemsize * sizeof(indices[0])); 229 impl->indices = impl->indices_allocated; 230 break; 231 case CEED_OWN_POINTER: 232 impl->indices_allocated = (CeedInt *)indices; 233 impl->indices = impl->indices_allocated; 234 break; 235 case CEED_USE_POINTER: 236 impl->indices = indices; 237 } 238 239 ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr); 240 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 241 CeedElemRestrictionApply_Ref); CeedChk(ierr); 242 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 243 CeedElemRestrictionApplyBlock_Ref); 244 CeedChk(ierr); 245 ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 246 CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 247 return 0; 248 } 249