1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 21 int ierr; 22 CeedOperator_Blocked *impl; 23 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 24 25 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 27 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 28 } 29 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 30 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 31 ierr = CeedFree(&impl->edata); CeedChk(ierr); 32 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numein; i++) { 35 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 36 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 37 } 38 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 39 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 40 41 for (CeedInt i=0; i<impl->numeout; i++) { 42 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 43 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 44 } 45 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 46 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 47 48 ierr = CeedFree(&impl); CeedChk(ierr); 49 return 0; 50 } 51 52 /* 53 Setup infields or outfields 54 */ 55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 56 CeedOperator op, bool inOrOut, 57 CeedElemRestriction *blkrestr, 58 CeedVector *fullevecs, CeedVector *evecs, 59 CeedVector *qvecs, CeedInt starte, 60 CeedInt numfields, CeedInt Q) { 61 CeedInt dim, ierr, ncomp, size, P; 62 Ceed ceed; 63 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 64 CeedBasis basis; 65 CeedElemRestriction r; 66 CeedOperatorField *opfields; 67 CeedQFunctionField *qffields; 68 if (inOrOut) { 69 ierr = CeedOperatorGetFields(op, NULL, &opfields); 70 CeedChk(ierr); 71 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 72 CeedChk(ierr); 73 } else { 74 ierr = CeedOperatorGetFields(op, &opfields, NULL); 75 CeedChk(ierr); 76 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 77 CeedChk(ierr); 78 } 79 const CeedInt blksize = 8; 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 112 break; 113 case CEED_EVAL_INTERP: 114 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 115 ierr = CeedElemRestrictionGetElementSize(r, &P); 116 CeedChk(ierr); 117 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 119 break; 120 case CEED_EVAL_GRAD: 121 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 122 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 123 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 124 ierr = CeedElemRestrictionGetElementSize(r, &P); 125 CeedChk(ierr); 126 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 128 break; 129 case CEED_EVAL_WEIGHT: // Only on input fields 130 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 131 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 132 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 133 CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 134 135 break; 136 case CEED_EVAL_DIV: 137 break; // Not implemented 138 case CEED_EVAL_CURL: 139 break; // Not implemented 140 } 141 } 142 return 0; 143 } 144 145 /* 146 CeedOperator needs to connect all the named fields (be they active or passive) 147 to the named inputs and outputs of its CeedQFunction. 148 */ 149 static int CeedOperatorSetup_Blocked(CeedOperator op) { 150 int ierr; 151 bool setupdone; 152 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 153 if (setupdone) return 0; 154 Ceed ceed; 155 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 156 CeedOperator_Blocked *impl; 157 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 158 CeedQFunction qf; 159 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 160 CeedInt Q, numinputfields, numoutputfields; 161 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 162 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 163 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 164 CeedChk(ierr); 165 CeedOperatorField *opinputfields, *opoutputfields; 166 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 167 CeedChk(ierr); 168 CeedQFunctionField *qfinputfields, *qfoutputfields; 169 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 170 CeedChk(ierr); 171 172 // Allocate 173 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 174 CeedChk(ierr); 175 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 176 CeedChk(ierr); 177 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 178 CeedChk(ierr); 179 180 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 181 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 182 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 183 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 184 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 185 186 impl->numein = numinputfields; impl->numeout = numoutputfields; 187 188 // Set up infield and outfield pointer arrays 189 // Infields 190 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 191 impl->evecs, impl->evecsin, 192 impl->qvecsin, 0, 193 numinputfields, Q); 194 CeedChk(ierr); 195 // Outfields 196 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 197 impl->evecs, impl->evecsout, 198 impl->qvecsout, numinputfields, 199 numoutputfields, Q); 200 CeedChk(ierr); 201 202 // Identity QFunctions 203 if (impl->identityqf) { 204 CeedEvalMode inmode, outmode; 205 CeedQFunctionField *infields, *outfields; 206 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 207 208 for (CeedInt i=0; i<numinputfields; i++) { 209 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 210 CeedChk(ierr); 211 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 212 CeedChk(ierr); 213 214 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 215 impl->qvecsout[i] = impl->qvecsin[i]; 216 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 217 } 218 } 219 220 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 221 222 return 0; 223 } 224 225 // Setup Input fields 226 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 227 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 228 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 229 CeedRequest *request) { 230 CeedInt ierr; 231 CeedEvalMode emode; 232 CeedVector vec; 233 CeedTransposeMode lmode; 234 uint64_t state; 235 236 for (CeedInt i=0; i<numinputfields; i++) { 237 // Get input vector 238 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 239 if (vec == CEED_VECTOR_ACTIVE) { 240 if (skipactive) 241 continue; 242 else 243 vec = invec; 244 } 245 246 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 247 CeedChk(ierr); 248 if (emode == CEED_EVAL_WEIGHT) { // Skip 249 } else { 250 // Restrict 251 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 252 if (state != impl->inputstate[i] || vec == invec) { 253 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 254 CeedChk(ierr); 255 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 256 lmode, vec, impl->evecs[i], 257 request); CeedChk(ierr); CeedChk(ierr); 258 impl->inputstate[i] = state; 259 } 260 // Get evec 261 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 262 (const CeedScalar **) &impl->edata[i]); 263 CeedChk(ierr); 264 } 265 } 266 return 0; 267 } 268 269 // Input basis action 270 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 271 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 272 CeedInt numinputfields, CeedInt blksize, bool skipactive, 273 CeedOperator_Blocked *impl) { 274 CeedInt ierr; 275 CeedInt dim, elemsize, size; 276 CeedElemRestriction Erestrict; 277 CeedEvalMode emode; 278 CeedBasis basis; 279 280 for (CeedInt i=0; i<numinputfields; i++) { 281 // Skip active input 282 if (skipactive) { 283 CeedVector vec; 284 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 285 if (vec == CEED_VECTOR_ACTIVE) 286 continue; 287 } 288 289 // Get elemsize, emode, size 290 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 291 CeedChk(ierr); 292 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 293 CeedChk(ierr); 294 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 295 CeedChk(ierr); 296 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 297 // Basis action 298 switch(emode) { 299 case CEED_EVAL_NONE: 300 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 301 CEED_USE_POINTER, 302 &impl->edata[i][e*Q*size]); CeedChk(ierr); 303 break; 304 case CEED_EVAL_INTERP: 305 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 306 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 307 CEED_USE_POINTER, 308 &impl->edata[i][e*elemsize*size]); 309 CeedChk(ierr); 310 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 311 CEED_EVAL_INTERP, impl->evecsin[i], 312 impl->qvecsin[i]); CeedChk(ierr); 313 break; 314 case CEED_EVAL_GRAD: 315 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 316 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 317 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 318 CEED_USE_POINTER, 319 &impl->edata[i][e*elemsize*size/dim]); 320 CeedChk(ierr); 321 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 322 CEED_EVAL_GRAD, impl->evecsin[i], 323 impl->qvecsin[i]); CeedChk(ierr); 324 break; 325 case CEED_EVAL_WEIGHT: 326 break; // No action 327 case CEED_EVAL_DIV: 328 case CEED_EVAL_CURL: { 329 // LCOV_EXCL_START 330 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 331 CeedChk(ierr); 332 Ceed ceed; 333 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 334 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 335 // LCOV_EXCL_STOP 336 break; // Not implemented 337 } 338 } 339 } 340 return 0; 341 } 342 343 // Output basis action 344 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 345 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 346 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 347 CeedOperator op, CeedOperator_Blocked *impl) { 348 CeedInt ierr; 349 CeedInt dim, elemsize, size; 350 CeedElemRestriction Erestrict; 351 CeedEvalMode emode; 352 CeedBasis basis; 353 354 for (CeedInt i=0; i<numoutputfields; i++) { 355 // Get elemsize, emode, size 356 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 357 CeedChk(ierr); 358 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 359 CeedChk(ierr); 360 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 361 CeedChk(ierr); 362 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 363 // Basis action 364 switch(emode) { 365 case CEED_EVAL_NONE: 366 break; // No action 367 case CEED_EVAL_INTERP: 368 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 369 CeedChk(ierr); 370 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 371 CEED_USE_POINTER, 372 &impl->edata[i + numinputfields][e*elemsize*size]); 373 CeedChk(ierr); 374 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 375 CEED_EVAL_INTERP, impl->qvecsout[i], 376 impl->evecsout[i]); CeedChk(ierr); 377 break; 378 case CEED_EVAL_GRAD: 379 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 380 CeedChk(ierr); 381 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 382 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 383 CEED_USE_POINTER, 384 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 385 CeedChk(ierr); 386 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 387 CEED_EVAL_GRAD, impl->qvecsout[i], 388 impl->evecsout[i]); CeedChk(ierr); 389 break; 390 case CEED_EVAL_WEIGHT: { 391 // LCOV_EXCL_START 392 Ceed ceed; 393 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 394 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 395 "evaluation mode"); 396 // LCOV_EXCL_STOP 397 break; // Should not occur 398 } 399 case CEED_EVAL_DIV: 400 case CEED_EVAL_CURL: { 401 // LCOV_EXCL_START 402 Ceed ceed; 403 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 404 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 405 // LCOV_EXCL_STOP 406 break; // Not implemented 407 } 408 } 409 } 410 return 0; 411 } 412 413 // Restore Inputs 414 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 415 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 416 bool skipactive, CeedOperator_Blocked *impl) { 417 CeedInt ierr; 418 CeedEvalMode emode; 419 420 for (CeedInt i=0; i<numinputfields; i++) { 421 // Skip active inputs 422 if (skipactive) { 423 CeedVector vec; 424 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 425 if (vec == CEED_VECTOR_ACTIVE) 426 continue; 427 } 428 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 429 CeedChk(ierr); 430 if (emode == CEED_EVAL_WEIGHT) { // Skip 431 } else { 432 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 433 (const CeedScalar **) &impl->edata[i]); 434 CeedChk(ierr); 435 } 436 } 437 return 0; 438 } 439 440 // Apply Ceed Operator 441 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 442 CeedVector outvec, 443 CeedRequest *request) { 444 int ierr; 445 CeedOperator_Blocked *impl; 446 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 447 const CeedInt blksize = 8; 448 CeedInt Q, numinputfields, numoutputfields, numelements, size; 449 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 450 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 451 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 452 CeedQFunction qf; 453 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 454 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 455 CeedChk(ierr); 456 CeedTransposeMode lmode; 457 CeedOperatorField *opinputfields, *opoutputfields; 458 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 459 CeedChk(ierr); 460 CeedQFunctionField *qfinputfields, *qfoutputfields; 461 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 462 CeedChk(ierr); 463 CeedEvalMode emode; 464 CeedVector vec; 465 466 // Setup 467 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 468 469 // Input Evecs and Restriction 470 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 471 opinputfields, invec, false, impl, 472 request); CeedChk(ierr); 473 474 // Output Evecs 475 for (CeedInt i=0; i<numoutputfields; i++) { 476 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 477 &impl->edata[i + numinputfields]); CeedChk(ierr); 478 } 479 480 // Loop through elements 481 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 482 // Output pointers 483 for (CeedInt i=0; i<numoutputfields; i++) { 484 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 485 CeedChk(ierr); 486 if (emode == CEED_EVAL_NONE) { 487 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 488 CeedChk(ierr); 489 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 490 CEED_USE_POINTER, 491 &impl->edata[i + numinputfields][e*Q*size]); 492 CeedChk(ierr); 493 } 494 } 495 496 // Input basis apply 497 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 498 numinputfields, blksize, false, impl); 499 CeedChk(ierr); 500 501 // Q function 502 if (!impl->identityqf) { 503 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 504 CeedChk(ierr); 505 } 506 507 // Output basis apply 508 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 509 blksize, numinputfields, 510 numoutputfields, op, impl); 511 CeedChk(ierr); 512 } 513 514 // Zero lvecs 515 for (CeedInt i=0; i<numoutputfields; i++) { 516 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 517 if (vec == CEED_VECTOR_ACTIVE) { 518 if (!impl->add) { 519 vec = outvec; 520 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 521 } 522 } else { 523 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 524 } 525 } 526 impl->add = false; 527 528 // Output restriction 529 for (CeedInt i=0; i<numoutputfields; i++) { 530 // Restore evec 531 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 532 &impl->edata[i + numinputfields]); CeedChk(ierr); 533 // Get output vector 534 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 535 // Active 536 if (vec == CEED_VECTOR_ACTIVE) 537 vec = outvec; 538 // Restrict 539 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 540 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 541 lmode, impl->evecs[i+impl->numein], vec, 542 request); CeedChk(ierr); 543 544 } 545 546 // Restore input arrays 547 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 548 opinputfields, false, impl); CeedChk(ierr); 549 550 return 0; 551 } 552 553 // Assemble Linear QFunction 554 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 555 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 556 int ierr; 557 CeedOperator_Blocked *impl; 558 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 559 const CeedInt blksize = 8; 560 CeedInt Q, numinputfields, numoutputfields, numelements, size; 561 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 562 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 563 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 564 CeedQFunction qf; 565 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 566 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 567 CeedChk(ierr); 568 CeedOperatorField *opinputfields, *opoutputfields; 569 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 570 CeedChk(ierr); 571 CeedQFunctionField *qfinputfields, *qfoutputfields; 572 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 573 CeedChk(ierr); 574 CeedVector vec, lvec; 575 CeedInt numactivein = 0, numactiveout = 0; 576 CeedVector *activein = NULL; 577 CeedScalar *a, *tmp; 578 Ceed ceed; 579 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 580 581 // Setup 582 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 583 584 // Check for identity 585 if (impl->identityqf) 586 // LCOV_EXCL_START 587 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 588 // LCOV_EXCL_STOP 589 590 // Input Evecs and Restriction 591 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 592 opinputfields, NULL, true, impl, 593 request); CeedChk(ierr); 594 595 // Count number of active input fields 596 for (CeedInt i=0; i<numinputfields; i++) { 597 // Get input vector 598 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 599 // Check if active input 600 if (vec == CEED_VECTOR_ACTIVE) { 601 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 602 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 603 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 604 CeedChk(ierr); 605 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 606 for (CeedInt field=0; field<size; field++) { 607 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 608 CeedChk(ierr); 609 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 610 CEED_USE_POINTER, &tmp[field*Q*blksize]); 611 CeedChk(ierr); 612 } 613 numactivein += size; 614 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 615 } 616 } 617 618 // Count number of active output fields 619 for (CeedInt i=0; i<numoutputfields; i++) { 620 // Get output vector 621 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 622 // Check if active output 623 if (vec == CEED_VECTOR_ACTIVE) { 624 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 625 numactiveout += size; 626 } 627 } 628 629 // Check sizes 630 if (!numactivein || !numactiveout) 631 // LCOV_EXCL_START 632 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 633 "and outputs"); 634 // LCOV_EXCL_STOP 635 636 // Setup lvec 637 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 638 &lvec); CeedChk(ierr); 639 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 640 641 // Create output restriction 642 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 643 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 644 // Create assembled vector 645 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 646 assembled); CeedChk(ierr); 647 648 // Loop through elements 649 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 650 // Input basis apply 651 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 652 numinputfields, blksize, true, impl); 653 CeedChk(ierr); 654 655 // Assemble QFunction 656 for (CeedInt in=0; in<numactivein; in++) { 657 // Set Inputs 658 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 659 if (numactivein > 1) { 660 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 661 0.0); CeedChk(ierr); 662 } 663 // Set Outputs 664 for (CeedInt out=0; out<numoutputfields; out++) { 665 // Get output vector 666 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 667 CeedChk(ierr); 668 // Check if active output 669 if (vec == CEED_VECTOR_ACTIVE) { 670 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 671 CEED_USE_POINTER, a); CeedChk(ierr); 672 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 673 CeedChk(ierr); 674 a += size*Q*blksize; // Advance the pointer by the size of the output 675 } 676 } 677 // Apply QFunction 678 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 679 CeedChk(ierr); 680 } 681 } 682 683 // Un-set output Qvecs to prevent accidental overwrite of Assembled 684 for (CeedInt out=0; out<numoutputfields; out++) { 685 // Get output vector 686 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 687 CeedChk(ierr); 688 // Check if active output 689 if (vec == CEED_VECTOR_ACTIVE) { 690 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 691 NULL); CeedChk(ierr); 692 } 693 } 694 695 // Restore input arrays 696 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 697 opinputfields, true, impl); CeedChk(ierr); 698 699 // Output blocked restriction 700 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 701 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 702 CeedElemRestriction blkrstr; 703 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 704 numelements*Q, 705 numactivein*numactiveout, 706 CEED_MEM_HOST, CEED_COPY_VALUES, 707 NULL, &blkrstr); CeedChk(ierr); 708 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 709 lvec, *assembled, request); CeedChk(ierr); 710 711 // Cleanup 712 for (CeedInt i=0; i<numactivein; i++) { 713 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 714 } 715 ierr = CeedFree(&activein); CeedChk(ierr); 716 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 717 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 718 719 return 0; 720 } 721 722 int CeedOperatorCreate_Blocked(CeedOperator op) { 723 int ierr; 724 Ceed ceed; 725 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 726 CeedOperator_Blocked *impl; 727 728 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 729 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 730 731 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 732 CeedOperatorAssembleLinearQFunction_Blocked); 733 CeedChk(ierr); 734 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 735 CeedOperatorApply_Blocked); CeedChk(ierr); 736 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 737 CeedOperatorDestroy_Blocked); CeedChk(ierr); 738 return 0; 739 } 740