1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "ceed-blocked.h" 20 #include "../ref/ceed-ref.h" 21 22 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 23 CeedOperator_Blocked *impl = op->data; 24 int ierr; 25 26 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29 } 30 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32 ierr = CeedFree(&impl->edata); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) { 35 ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr); 36 } 37 ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr); 38 ierr = CeedFree(&impl->qdata); CeedChk(ierr); 39 40 ierr = CeedFree(&impl->indata); CeedChk(ierr); 41 ierr = CeedFree(&impl->outdata); CeedChk(ierr); 42 43 ierr = CeedFree(&op->data); CeedChk(ierr); 44 return 0; 45 } 46 47 /* 48 Setup infields or outfields 49 */ 50 static int CeedOperatorSetupFields_Blocked(struct CeedQFunctionField qfields[16], 51 struct CeedOperatorField ofields[16], 52 CeedElemRestriction *blkrestr, 53 CeedVector *evecs, CeedScalar **qdata, 54 CeedScalar **qdata_alloc, CeedScalar **indata, 55 CeedInt starti, CeedInt startq, 56 CeedInt numfields, CeedInt Q) { 57 CeedInt dim, ierr, iq=startq, ncomp; 58 const CeedInt blksize = 8; 59 60 // Loop over fields 61 for (CeedInt i=0; i<numfields; i++) { 62 CeedEvalMode emode = qfields[i].emode; 63 64 if (emode != CEED_EVAL_WEIGHT) { 65 CeedElemRestriction r = ofields[i].Erestrict; 66 CeedElemRestriction_Ref *data = r->data; 67 ierr = CeedElemRestrictionCreateBlocked(r->ceed, r->nelem, r->elemsize, 68 blksize, r->ndof, r->ncomp, 69 CEED_MEM_HOST, CEED_COPY_VALUES, 70 data->indices, &blkrestr[i+starti]); 71 CeedChk(ierr); 72 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starti], NULL, 73 &evecs[i+starti]); 74 CeedChk(ierr); 75 } 76 77 switch(emode) { 78 case CEED_EVAL_NONE: 79 break; // No action 80 case CEED_EVAL_INTERP: 81 ncomp = qfields[i].ncomp; 82 ierr = CeedMalloc(Q*ncomp*blksize, &qdata_alloc[iq]); CeedChk(ierr); 83 qdata[i + starti] = qdata_alloc[iq]; 84 iq++; 85 break; 86 case CEED_EVAL_GRAD: 87 ncomp = qfields[i].ncomp; 88 dim = ofields[i].basis->dim; 89 ierr = CeedMalloc(Q*ncomp*dim*blksize, &qdata_alloc[iq]); CeedChk(ierr); 90 qdata[i + starti] = qdata_alloc[iq]; 91 iq++; 92 break; 93 case CEED_EVAL_WEIGHT: // Only on input fields 94 ierr = CeedMalloc(Q*blksize, &qdata_alloc[iq]); CeedChk(ierr); 95 ierr = CeedBasisApply(ofields[iq].basis, blksize, CEED_NOTRANSPOSE, 96 CEED_EVAL_WEIGHT, NULL, qdata_alloc[iq]); CeedChk(ierr); 97 qdata[i] = qdata_alloc[iq]; 98 indata[i] = qdata[i]; 99 iq++; 100 break; 101 case CEED_EVAL_DIV: 102 break; // Not implimented 103 case CEED_EVAL_CURL: 104 break; // Not implimented 105 } 106 } 107 return 0; 108 } 109 110 /* 111 CeedOperator needs to connect all the named fields (be they active or passive) 112 to the named inputs and outputs of its CeedQFunction. 113 */ 114 static int CeedOperatorSetup_Blocked(CeedOperator op) { 115 if (op->setupdone) return 0; 116 CeedOperator_Blocked *impl = op->data; 117 CeedQFunction qf = op->qf; 118 CeedInt Q = op->numqpoints, numinputfields, numoutputfields; 119 int ierr; 120 121 // Count infield and outfield array sizes and evectors 122 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 123 CeedChk(ierr); 124 impl->numein = numinputfields; 125 for (CeedInt i=0; i<numinputfields; i++) { 126 CeedEvalMode emode = qf->inputfields[i].emode; 127 impl->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) + 128 !!(emode & CEED_EVAL_WEIGHT); 129 } 130 impl->numeout = numoutputfields; 131 for (CeedInt i=0; i<numoutputfields; i++) { 132 CeedEvalMode emode = qf->outputfields[i].emode; 133 impl->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD); 134 } 135 136 // Allocate 137 ierr = CeedCalloc(impl->numein + impl->numeout, &impl->blkrestr); 138 CeedChk(ierr); 139 ierr = CeedCalloc(impl->numein + impl->numeout, &impl->evecs); 140 CeedChk(ierr); 141 ierr = CeedCalloc(impl->numein + impl->numeout, &impl->edata); 142 CeedChk(ierr); 143 144 ierr = CeedCalloc(impl->numqin + impl->numqout, &impl->qdata_alloc); 145 CeedChk(ierr); 146 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->qdata); 147 CeedChk(ierr); 148 149 ierr = CeedCalloc(16, &impl->indata); CeedChk(ierr); 150 ierr = CeedCalloc(16, &impl->outdata); CeedChk(ierr); 151 // Set up infield and outfield pointer arrays 152 // Infields 153 ierr = CeedOperatorSetupFields_Blocked(qf->inputfields, op->inputfields, 154 impl->blkrestr, impl->evecs, 155 impl->qdata, impl->qdata_alloc, 156 impl->indata, 0, 157 0, numinputfields, Q); 158 CeedChk(ierr); 159 // Outfields 160 ierr = CeedOperatorSetupFields_Blocked(qf->outputfields, op->outputfields, 161 impl->blkrestr, impl->evecs, 162 impl->qdata, impl->qdata_alloc, 163 impl->indata, numinputfields, 164 impl->numqin, numoutputfields, Q); 165 CeedChk(ierr); 166 // Input Qvecs 167 for (CeedInt i=0; i<numinputfields; i++) { 168 CeedEvalMode emode = qf->inputfields[i].emode; 169 if ((emode != CEED_EVAL_NONE) && (emode != CEED_EVAL_WEIGHT)) 170 impl->indata[i] = impl->qdata[i]; 171 } 172 // Output Qvecs 173 for (CeedInt i=0; i<numoutputfields; i++) { 174 CeedEvalMode emode = qf->outputfields[i].emode; 175 if (emode != CEED_EVAL_NONE) 176 impl->outdata[i] = impl->qdata[i + numinputfields]; 177 } 178 179 op->setupdone = 1; 180 181 return 0; 182 } 183 184 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 185 CeedVector outvec, CeedRequest *request) { 186 CeedOperator_Blocked *impl = op->data; 187 const CeedInt blksize = 8; 188 CeedInt Q = op->numqpoints, elemsize, numinputfields, numoutputfields, 189 nblks = (op->numelements/blksize) + !!(op->numelements%blksize); 190 int ierr; 191 CeedQFunction qf = op->qf; 192 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 193 194 // Setup 195 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 196 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 197 CeedChk(ierr); 198 199 // Input Evecs and Restriction 200 for (CeedInt i=0; i<numinputfields; i++) { 201 CeedEvalMode emode = qf->inputfields[i].emode; 202 if (emode == CEED_EVAL_WEIGHT) { // Skip 203 } else { 204 // Active 205 // Restrict 206 if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) { 207 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 208 lmode, invec, impl->evecs[i], 209 request); CeedChk(ierr); CeedChk(ierr); 210 } else { 211 // Passive 212 // Restrict 213 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 214 lmode, op->inputfields[i].vec, impl->evecs[i], 215 request); CeedChk(ierr); 216 } 217 // Get evec 218 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 219 (const CeedScalar **) &impl->edata[i]); 220 CeedChk(ierr); 221 } 222 } 223 224 // Output Evecs 225 for (CeedInt i=0; i<numoutputfields; i++) { 226 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 227 &impl->edata[i + numinputfields]); CeedChk(ierr); 228 } 229 230 // Loop through elements 231 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 232 // Input basis apply if needed 233 for (CeedInt i=0; i<numinputfields; i++) { 234 // Get elemsize, emode, ncomp 235 elemsize = op->inputfields[i].Erestrict->elemsize; 236 CeedEvalMode emode = qf->inputfields[i].emode; 237 CeedInt ncomp = qf->inputfields[i].ncomp; 238 // Basis action 239 switch(emode) { 240 case CEED_EVAL_NONE: 241 impl->indata[i] = &impl->edata[i][e*Q*ncomp]; 242 break; 243 case CEED_EVAL_INTERP: 244 ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE, 245 CEED_EVAL_INTERP, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]); 246 CeedChk(ierr); 247 break; 248 case CEED_EVAL_GRAD: 249 ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE, 250 CEED_EVAL_GRAD, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]); 251 CeedChk(ierr); 252 break; 253 case CEED_EVAL_WEIGHT: 254 break; // No action 255 case CEED_EVAL_DIV: 256 break; // Not implimented 257 case CEED_EVAL_CURL: 258 break; // Not implimented 259 } 260 } 261 262 // Output pointers 263 for (CeedInt i=0; i<numoutputfields; i++) { 264 CeedEvalMode emode = qf->outputfields[i].emode; 265 if (emode == CEED_EVAL_NONE) { 266 CeedInt ncomp = qf->outputfields[i].ncomp; 267 impl->outdata[i] = &impl->edata[i + numinputfields][e*Q*ncomp]; 268 } 269 } 270 // Q function 271 ierr = CeedQFunctionApply(op->qf, Q*blksize, 272 (const CeedScalar * const*) impl->indata, 273 impl->outdata); CeedChk(ierr); 274 275 // Output basis apply if needed 276 for (CeedInt i=0; i<numoutputfields; i++) { 277 // Get elemsize, emode, ncomp 278 elemsize = op->outputfields[i].Erestrict->elemsize; 279 CeedInt ncomp = qf->outputfields[i].ncomp; 280 CeedEvalMode emode = qf->outputfields[i].emode; 281 // Basis action 282 switch(emode) { 283 case CEED_EVAL_NONE: 284 break; // No action 285 case CEED_EVAL_INTERP: 286 ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE, 287 CEED_EVAL_INTERP, impl->outdata[i], 288 &impl->edata[i + numinputfields][e*elemsize*ncomp]); 289 CeedChk(ierr); 290 break; 291 case CEED_EVAL_GRAD: 292 ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE, 293 CEED_EVAL_GRAD, 294 impl->outdata[i], &impl->edata[i + numinputfields][e*elemsize*ncomp]); 295 CeedChk(ierr); 296 break; 297 case CEED_EVAL_WEIGHT: 298 return CeedError(op->ceed, 1, 299 "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 300 break; // Should not occur 301 case CEED_EVAL_DIV: 302 break; // Not implimented 303 case CEED_EVAL_CURL: 304 break; // Not implimented 305 } 306 } 307 } 308 309 // Zero lvecs 310 ierr = CeedVectorSetValue(outvec, 0.0); CeedChk(ierr); 311 for (CeedInt i=0; i<qf->numoutputfields; i++) 312 if (op->outputfields[i].vec != CEED_VECTOR_ACTIVE) { 313 ierr = CeedVectorSetValue(op->outputfields[i].vec, 0.0); CeedChk(ierr); 314 } 315 316 // Output restriction 317 for (CeedInt i=0; i<numoutputfields; i++) { 318 // Restore evec 319 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 320 &impl->edata[i + numinputfields]); CeedChk(ierr); 321 // Active 322 if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) { 323 // Restrict 324 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 325 lmode, impl->evecs[i+impl->numein], outvec, request); CeedChk(ierr); 326 } else { 327 // Passive 328 // Restrict 329 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 330 lmode, impl->evecs[i+impl->numein], op->outputfields[i].vec, 331 request); CeedChk(ierr); 332 } 333 } 334 335 // Restore input arrays 336 for (CeedInt i=0; i<numinputfields; i++) { 337 CeedEvalMode emode = qf->inputfields[i].emode; 338 if (emode == CEED_EVAL_WEIGHT) { // Skip 339 } else { 340 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 341 (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); 342 } 343 } 344 345 return 0; 346 } 347 348 int CeedOperatorCreate_Blocked(CeedOperator op) { 349 CeedOperator_Blocked *impl; 350 int ierr; 351 352 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 353 op->data = impl; 354 op->Destroy = CeedOperatorDestroy_Blocked; 355 op->Apply = CeedOperatorApply_Blocked; 356 return 0; 357 } 358