1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-blocked.h" 19 #include "../ref/ceed-ref.h" 20 21 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 22 int ierr; 23 CeedOperator_Blocked *impl; 24 ierr = CeedOperatorGetData(op, (void*)&impl); CeedChk(ierr); 25 26 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29 } 30 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32 ierr = CeedFree(&impl->edata); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) { 35 ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr); 36 } 37 ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr); 38 ierr = CeedFree(&impl->qdata); CeedChk(ierr); 39 40 ierr = CeedFree(&impl->indata); CeedChk(ierr); 41 ierr = CeedFree(&impl->outdata); CeedChk(ierr); 42 43 ierr = CeedFree(&op->data); CeedChk(ierr); 44 return 0; 45 } 46 47 /* 48 Setup infields or outfields 49 */ 50 static int CeedOperatorSetupFields_Blocked(struct CeedQFunctionField qfields[16], 51 struct CeedOperatorField ofields[16], 52 CeedElemRestriction *blkrestr, 53 CeedVector *evecs, CeedScalar **qdata, 54 CeedScalar **qdata_alloc, CeedScalar **indata, 55 CeedInt starti, CeedInt startq, 56 CeedInt numfields, CeedInt Q) { 57 CeedInt dim, ierr, iq=startq, ncomp; 58 const CeedInt blksize = 8; 59 60 // Loop over fields 61 for (CeedInt i=0; i<numfields; i++) { 62 CeedEvalMode emode = qfields[i].emode; 63 64 if (emode != CEED_EVAL_WEIGHT) { 65 CeedElemRestriction r = ofields[i].Erestrict; 66 CeedElemRestriction_Ref *data = r->data; 67 Ceed ceed; 68 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 69 CeedInt nelem, elemsize, ndof, ncomp; 70 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 71 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 72 ierr = CeedElemRestrictionGetNumDoF(r, &ndof); CeedChk(ierr); 73 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 74 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 75 blksize, ndof, ncomp, 76 CEED_MEM_HOST, CEED_COPY_VALUES, 77 data->indices, &blkrestr[i+starti]); 78 CeedChk(ierr); 79 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starti], NULL, 80 &evecs[i+starti]); 81 CeedChk(ierr); 82 } 83 84 switch(emode) { 85 case CEED_EVAL_NONE: 86 break; // No action 87 case CEED_EVAL_INTERP: 88 ncomp = qfields[i].ncomp; 89 ierr = CeedMalloc(Q*ncomp*blksize, &qdata_alloc[iq]); CeedChk(ierr); 90 qdata[i + starti] = qdata_alloc[iq]; 91 iq++; 92 break; 93 case CEED_EVAL_GRAD: 94 ncomp = qfields[i].ncomp; 95 ierr = CeedBasisGetDimension(ofields[i].basis, &dim); CeedChk(ierr); 96 ierr = CeedMalloc(Q*ncomp*dim*blksize, &qdata_alloc[iq]); CeedChk(ierr); 97 qdata[i + starti] = qdata_alloc[iq]; 98 iq++; 99 break; 100 case CEED_EVAL_WEIGHT: // Only on input fields 101 ierr = CeedMalloc(Q*blksize, &qdata_alloc[iq]); CeedChk(ierr); 102 ierr = CeedBasisApply(ofields[iq].basis, blksize, CEED_NOTRANSPOSE, 103 CEED_EVAL_WEIGHT, NULL, qdata_alloc[iq]); CeedChk(ierr); 104 qdata[i] = qdata_alloc[iq]; 105 indata[i] = qdata[i]; 106 iq++; 107 break; 108 case CEED_EVAL_DIV: 109 break; // Not implimented 110 case CEED_EVAL_CURL: 111 break; // Not implimented 112 } 113 } 114 return 0; 115 } 116 117 /* 118 CeedOperator needs to connect all the named fields (be they active or passive) 119 to the named inputs and outputs of its CeedQFunction. 120 */ 121 static int CeedOperatorSetup_Blocked(CeedOperator op) { 122 int ierr; 123 bool setupdone; 124 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 125 if (setupdone) return 0; 126 CeedOperator_Blocked *impl; 127 ierr = CeedOperatorGetData(op, (void*)&impl); CeedChk(ierr); 128 CeedQFunction qf; 129 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 130 CeedInt Q, numinputfields, numoutputfields; 131 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 132 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 133 CeedChk(ierr); 134 135 // Count infield and outfield array sizes and evectors 136 impl->numein = numinputfields; 137 for (CeedInt i=0; i<numinputfields; i++) { 138 CeedEvalMode emode = qf->inputfields[i].emode; 139 impl->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) + 140 !!(emode & CEED_EVAL_WEIGHT); 141 } 142 impl->numeout = numoutputfields; 143 for (CeedInt i=0; i<numoutputfields; i++) { 144 CeedEvalMode emode = qf->outputfields[i].emode; 145 impl->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD); 146 } 147 148 // Allocate 149 ierr = CeedCalloc(impl->numein + impl->numeout, &impl->blkrestr); 150 CeedChk(ierr); 151 ierr = CeedCalloc(impl->numein + impl->numeout, &impl->evecs); 152 CeedChk(ierr); 153 ierr = CeedCalloc(impl->numein + impl->numeout, &impl->edata); 154 CeedChk(ierr); 155 156 ierr = CeedCalloc(impl->numqin + impl->numqout, &impl->qdata_alloc); 157 CeedChk(ierr); 158 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->qdata); 159 CeedChk(ierr); 160 161 ierr = CeedCalloc(16, &impl->indata); CeedChk(ierr); 162 ierr = CeedCalloc(16, &impl->outdata); CeedChk(ierr); 163 // Set up infield and outfield pointer arrays 164 // Infields 165 ierr = CeedOperatorSetupFields_Blocked(qf->inputfields, op->inputfields, 166 impl->blkrestr, impl->evecs, 167 impl->qdata, impl->qdata_alloc, 168 impl->indata, 0, 169 0, numinputfields, Q); 170 CeedChk(ierr); 171 // Outfields 172 ierr = CeedOperatorSetupFields_Blocked(qf->outputfields, op->outputfields, 173 impl->blkrestr, impl->evecs, 174 impl->qdata, impl->qdata_alloc, 175 impl->indata, numinputfields, 176 impl->numqin, numoutputfields, Q); 177 CeedChk(ierr); 178 // Input Qvecs 179 for (CeedInt i=0; i<numinputfields; i++) { 180 CeedEvalMode emode = qf->inputfields[i].emode; 181 if ((emode != CEED_EVAL_NONE) && (emode != CEED_EVAL_WEIGHT)) 182 impl->indata[i] = impl->qdata[i]; 183 } 184 // Output Qvecs 185 for (CeedInt i=0; i<numoutputfields; i++) { 186 CeedEvalMode emode = qf->outputfields[i].emode; 187 if (emode != CEED_EVAL_NONE) 188 impl->outdata[i] = impl->qdata[i + numinputfields]; 189 } 190 191 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 192 193 return 0; 194 } 195 196 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 197 CeedVector outvec, CeedRequest *request) { 198 int ierr; 199 CeedOperator_Blocked *impl; 200 ierr = CeedOperatorGetData(op, (void*)&impl); CeedChk(ierr); 201 const CeedInt blksize = 8; 202 CeedInt Q, elemsize, numinputfields, numoutputfields, numelements; 203 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 204 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 205 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 206 CeedQFunction qf; 207 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 208 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 209 CeedChk(ierr); 210 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 211 212 // Setup 213 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 214 215 // Input Evecs and Restriction 216 for (CeedInt i=0; i<numinputfields; i++) { 217 CeedEvalMode emode = qf->inputfields[i].emode; 218 if (emode == CEED_EVAL_WEIGHT) { // Skip 219 } else { 220 // Active 221 // Restrict 222 if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) { 223 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 224 lmode, invec, impl->evecs[i], 225 request); CeedChk(ierr); CeedChk(ierr); 226 } else { 227 // Passive 228 // Restrict 229 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 230 lmode, op->inputfields[i].vec, impl->evecs[i], 231 request); CeedChk(ierr); 232 } 233 // Get evec 234 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 235 (const CeedScalar **) &impl->edata[i]); 236 CeedChk(ierr); 237 } 238 } 239 240 // Output Evecs 241 for (CeedInt i=0; i<numoutputfields; i++) { 242 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 243 &impl->edata[i + numinputfields]); CeedChk(ierr); 244 } 245 246 // Loop through elements 247 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 248 // Input basis apply if needed 249 for (CeedInt i=0; i<numinputfields; i++) { 250 // Get elemsize, emode, ncomp 251 ierr = CeedElemRestrictionGetElementSize( 252 op->inputfields[i].Erestrict, &elemsize); CeedChk(ierr); 253 CeedEvalMode emode = qf->inputfields[i].emode; 254 CeedInt ncomp = qf->inputfields[i].ncomp; 255 // Basis action 256 switch(emode) { 257 case CEED_EVAL_NONE: 258 impl->indata[i] = &impl->edata[i][e*Q*ncomp]; 259 break; 260 case CEED_EVAL_INTERP: 261 ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE, 262 CEED_EVAL_INTERP, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]); 263 CeedChk(ierr); 264 break; 265 case CEED_EVAL_GRAD: 266 ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE, 267 CEED_EVAL_GRAD, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]); 268 CeedChk(ierr); 269 break; 270 case CEED_EVAL_WEIGHT: 271 break; // No action 272 case CEED_EVAL_DIV: 273 break; // Not implimented 274 case CEED_EVAL_CURL: 275 break; // Not implimented 276 } 277 } 278 279 // Output pointers 280 for (CeedInt i=0; i<numoutputfields; i++) { 281 CeedEvalMode emode = qf->outputfields[i].emode; 282 if (emode == CEED_EVAL_NONE) { 283 CeedInt ncomp = qf->outputfields[i].ncomp; 284 impl->outdata[i] = &impl->edata[i + numinputfields][e*Q*ncomp]; 285 } 286 } 287 // Q function 288 ierr = CeedQFunctionApply(qf, Q*blksize, 289 (const CeedScalar * const*) impl->indata, 290 impl->outdata); CeedChk(ierr); 291 292 // Output basis apply if needed 293 for (CeedInt i=0; i<numoutputfields; i++) { 294 // Get elemsize, emode, ncomp 295 ierr = CeedElemRestrictionGetElementSize( 296 op->outputfields[i].Erestrict, &elemsize); CeedChk(ierr); 297 CeedInt ncomp = qf->outputfields[i].ncomp; 298 CeedEvalMode emode = qf->outputfields[i].emode; 299 // Basis action 300 switch(emode) { 301 case CEED_EVAL_NONE: 302 break; // No action 303 case CEED_EVAL_INTERP: 304 ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE, 305 CEED_EVAL_INTERP, impl->outdata[i], 306 &impl->edata[i + numinputfields][e*elemsize*ncomp]); 307 CeedChk(ierr); 308 break; 309 case CEED_EVAL_GRAD: 310 ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE, 311 CEED_EVAL_GRAD, 312 impl->outdata[i], &impl->edata[i + numinputfields][e*elemsize*ncomp]); 313 CeedChk(ierr); 314 break; 315 case CEED_EVAL_WEIGHT: { 316 Ceed ceed; 317 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 318 return CeedError(ceed, 1, 319 "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); 320 break; // Should not occur 321 } 322 case CEED_EVAL_DIV: 323 break; // Not implimented 324 case CEED_EVAL_CURL: 325 break; // Not implimented 326 } 327 } 328 } 329 330 // Zero lvecs 331 ierr = CeedVectorSetValue(outvec, 0.0); CeedChk(ierr); 332 for (CeedInt i=0; i<numoutputfields; i++) 333 if (op->outputfields[i].vec != CEED_VECTOR_ACTIVE) { 334 ierr = CeedVectorSetValue(op->outputfields[i].vec, 0.0); CeedChk(ierr); 335 } 336 337 // Output restriction 338 for (CeedInt i=0; i<numoutputfields; i++) { 339 // Restore evec 340 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 341 &impl->edata[i + numinputfields]); CeedChk(ierr); 342 // Active 343 if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) { 344 // Restrict 345 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 346 lmode, impl->evecs[i+impl->numein], outvec, request); CeedChk(ierr); 347 } else { 348 // Passive 349 // Restrict 350 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 351 lmode, impl->evecs[i+impl->numein], op->outputfields[i].vec, 352 request); CeedChk(ierr); 353 } 354 } 355 356 // Restore input arrays 357 for (CeedInt i=0; i<numinputfields; i++) { 358 CeedEvalMode emode = qf->inputfields[i].emode; 359 if (emode == CEED_EVAL_WEIGHT) { // Skip 360 } else { 361 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 362 (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); 363 } 364 } 365 366 return 0; 367 } 368 369 int CeedOperatorCreate_Blocked(CeedOperator op) { 370 int ierr; 371 CeedOperator_Blocked *impl; 372 373 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 374 op->data = impl; 375 op->Destroy = CeedOperatorDestroy_Blocked; 376 op->Apply = CeedOperatorApply_Blocked; 377 return 0; 378 } 379