1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 21 int ierr; 22 CeedOperator_Blocked *impl; 23 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 24 25 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 27 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 28 } 29 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 30 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 31 ierr = CeedFree(&impl->edata); CeedChk(ierr); 32 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numein; i++) { 35 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 36 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 37 } 38 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 39 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 40 41 for (CeedInt i=0; i<impl->numeout; i++) { 42 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 43 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 44 } 45 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 46 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 47 48 ierr = CeedFree(&impl); CeedChk(ierr); 49 return 0; 50 } 51 52 /* 53 Setup infields or outfields 54 */ 55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 56 CeedOperator op, bool inOrOut, 57 CeedElemRestriction *blkrestr, 58 CeedVector *fullevecs, CeedVector *evecs, 59 CeedVector *qvecs, CeedInt starte, 60 CeedInt numfields, CeedInt Q) { 61 CeedInt dim, ierr, ncomp, size, P; 62 Ceed ceed; 63 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 64 CeedBasis basis; 65 CeedElemRestriction r; 66 CeedOperatorField *opfields; 67 CeedQFunctionField *qffields; 68 if (inOrOut) { 69 ierr = CeedOperatorGetFields(op, NULL, &opfields); 70 CeedChk(ierr); 71 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 72 CeedChk(ierr); 73 } else { 74 ierr = CeedOperatorGetFields(op, &opfields, NULL); 75 CeedChk(ierr); 76 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 77 CeedChk(ierr); 78 } 79 const CeedInt blksize = 8; 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 112 break; 113 case CEED_EVAL_INTERP: 114 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 115 ierr = CeedElemRestrictionGetElementSize(r, &P); 116 CeedChk(ierr); 117 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 119 break; 120 case CEED_EVAL_GRAD: 121 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 122 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 123 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 124 ierr = CeedElemRestrictionGetElementSize(r, &P); 125 CeedChk(ierr); 126 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 128 break; 129 case CEED_EVAL_WEIGHT: // Only on input fields 130 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 131 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 132 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 133 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 134 CeedChk(ierr); 135 136 break; 137 case CEED_EVAL_DIV: 138 break; // Not implemented 139 case CEED_EVAL_CURL: 140 break; // Not implemented 141 } 142 } 143 return 0; 144 } 145 146 /* 147 CeedOperator needs to connect all the named fields (be they active or passive) 148 to the named inputs and outputs of its CeedQFunction. 149 */ 150 static int CeedOperatorSetup_Blocked(CeedOperator op) { 151 int ierr; 152 bool setupdone; 153 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 154 if (setupdone) return 0; 155 Ceed ceed; 156 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 157 CeedOperator_Blocked *impl; 158 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 159 CeedQFunction qf; 160 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 161 CeedInt Q, numinputfields, numoutputfields; 162 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 163 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 164 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 165 CeedChk(ierr); 166 CeedOperatorField *opinputfields, *opoutputfields; 167 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 168 CeedChk(ierr); 169 CeedQFunctionField *qfinputfields, *qfoutputfields; 170 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 171 CeedChk(ierr); 172 173 // Allocate 174 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 175 CeedChk(ierr); 176 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 177 CeedChk(ierr); 178 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 179 CeedChk(ierr); 180 181 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 182 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 183 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 184 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 185 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 186 187 impl->numein = numinputfields; impl->numeout = numoutputfields; 188 189 // Set up infield and outfield pointer arrays 190 // Infields 191 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 192 impl->evecs, impl->evecsin, 193 impl->qvecsin, 0, 194 numinputfields, Q); 195 CeedChk(ierr); 196 // Outfields 197 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 198 impl->evecs, impl->evecsout, 199 impl->qvecsout, numinputfields, 200 numoutputfields, Q); 201 CeedChk(ierr); 202 203 // Identity QFunctions 204 if (impl->identityqf) { 205 CeedEvalMode inmode, outmode; 206 CeedQFunctionField *infields, *outfields; 207 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 208 209 for (CeedInt i=0; i<numinputfields; i++) { 210 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 211 CeedChk(ierr); 212 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 213 CeedChk(ierr); 214 215 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 216 impl->qvecsout[i] = impl->qvecsin[i]; 217 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 218 } 219 } 220 221 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 222 223 return 0; 224 } 225 226 // Setup Input fields 227 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 228 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 229 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 230 CeedRequest *request) { 231 CeedInt ierr; 232 CeedEvalMode emode; 233 CeedVector vec; 234 CeedTransposeMode lmode; 235 uint64_t state; 236 237 for (CeedInt i=0; i<numinputfields; i++) { 238 // Get input vector 239 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 240 if (vec == CEED_VECTOR_ACTIVE) { 241 if (skipactive) 242 continue; 243 else 244 vec = invec; 245 } 246 247 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 248 CeedChk(ierr); 249 if (emode == CEED_EVAL_WEIGHT) { // Skip 250 } else { 251 // Restrict 252 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 253 if (state != impl->inputstate[i] || vec == invec) { 254 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 255 CeedChk(ierr); 256 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 257 lmode, vec, impl->evecs[i], 258 request); CeedChk(ierr); CeedChk(ierr); 259 impl->inputstate[i] = state; 260 } 261 // Get evec 262 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 263 (const CeedScalar **) &impl->edata[i]); 264 CeedChk(ierr); 265 } 266 } 267 return 0; 268 } 269 270 // Input basis action 271 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 272 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 273 CeedInt numinputfields, CeedInt blksize, bool skipactive, 274 CeedOperator_Blocked *impl) { 275 CeedInt ierr; 276 CeedInt dim, elemsize, size; 277 CeedElemRestriction Erestrict; 278 CeedEvalMode emode; 279 CeedBasis basis; 280 281 for (CeedInt i=0; i<numinputfields; i++) { 282 // Skip active input 283 if (skipactive) { 284 CeedVector vec; 285 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 286 if (vec == CEED_VECTOR_ACTIVE) 287 continue; 288 } 289 290 // Get elemsize, emode, size 291 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 292 CeedChk(ierr); 293 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 294 CeedChk(ierr); 295 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 296 CeedChk(ierr); 297 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 298 // Basis action 299 switch(emode) { 300 case CEED_EVAL_NONE: 301 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 302 CEED_USE_POINTER, 303 &impl->edata[i][e*Q*size]); CeedChk(ierr); 304 break; 305 case CEED_EVAL_INTERP: 306 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 307 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 308 CEED_USE_POINTER, 309 &impl->edata[i][e*elemsize*size]); 310 CeedChk(ierr); 311 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 312 CEED_EVAL_INTERP, impl->evecsin[i], 313 impl->qvecsin[i]); CeedChk(ierr); 314 break; 315 case CEED_EVAL_GRAD: 316 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 317 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 318 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 319 CEED_USE_POINTER, 320 &impl->edata[i][e*elemsize*size/dim]); 321 CeedChk(ierr); 322 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 323 CEED_EVAL_GRAD, impl->evecsin[i], 324 impl->qvecsin[i]); CeedChk(ierr); 325 break; 326 case CEED_EVAL_WEIGHT: 327 break; // No action 328 case CEED_EVAL_DIV: 329 case CEED_EVAL_CURL: { 330 // LCOV_EXCL_START 331 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 332 CeedChk(ierr); 333 Ceed ceed; 334 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 335 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 336 // LCOV_EXCL_STOP 337 break; // Not implemented 338 } 339 } 340 } 341 return 0; 342 } 343 344 // Output basis action 345 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 346 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 347 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 348 CeedOperator op, CeedOperator_Blocked *impl) { 349 CeedInt ierr; 350 CeedInt dim, elemsize, size; 351 CeedElemRestriction Erestrict; 352 CeedEvalMode emode; 353 CeedBasis basis; 354 355 for (CeedInt i=0; i<numoutputfields; i++) { 356 // Get elemsize, emode, size 357 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 358 CeedChk(ierr); 359 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 360 CeedChk(ierr); 361 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 362 CeedChk(ierr); 363 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 364 // Basis action 365 switch(emode) { 366 case CEED_EVAL_NONE: 367 break; // No action 368 case CEED_EVAL_INTERP: 369 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 370 CeedChk(ierr); 371 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 372 CEED_USE_POINTER, 373 &impl->edata[i + numinputfields][e*elemsize*size]); 374 CeedChk(ierr); 375 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 376 CEED_EVAL_INTERP, impl->qvecsout[i], 377 impl->evecsout[i]); CeedChk(ierr); 378 break; 379 case CEED_EVAL_GRAD: 380 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 381 CeedChk(ierr); 382 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 383 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 384 CEED_USE_POINTER, 385 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 386 CeedChk(ierr); 387 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 388 CEED_EVAL_GRAD, impl->qvecsout[i], 389 impl->evecsout[i]); CeedChk(ierr); 390 break; 391 case CEED_EVAL_WEIGHT: { 392 // LCOV_EXCL_START 393 Ceed ceed; 394 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 395 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 396 "evaluation mode"); 397 // LCOV_EXCL_STOP 398 break; // Should not occur 399 } 400 case CEED_EVAL_DIV: 401 case CEED_EVAL_CURL: { 402 // LCOV_EXCL_START 403 Ceed ceed; 404 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 405 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 406 // LCOV_EXCL_STOP 407 break; // Not implemented 408 } 409 } 410 } 411 return 0; 412 } 413 414 // Restore Inputs 415 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 416 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 417 bool skipactive, CeedOperator_Blocked *impl) { 418 CeedInt ierr; 419 CeedEvalMode emode; 420 421 for (CeedInt i=0; i<numinputfields; i++) { 422 // Skip active inputs 423 if (skipactive) { 424 CeedVector vec; 425 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 426 if (vec == CEED_VECTOR_ACTIVE) 427 continue; 428 } 429 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 430 CeedChk(ierr); 431 if (emode == CEED_EVAL_WEIGHT) { // Skip 432 } else { 433 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 434 (const CeedScalar **) &impl->edata[i]); 435 CeedChk(ierr); 436 } 437 } 438 return 0; 439 } 440 441 // Apply Ceed Operator 442 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 443 CeedVector outvec, 444 CeedRequest *request) { 445 int ierr; 446 CeedOperator_Blocked *impl; 447 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 448 const CeedInt blksize = 8; 449 CeedInt Q, numinputfields, numoutputfields, numelements, size; 450 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 451 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 452 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 453 CeedQFunction qf; 454 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 455 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 456 CeedChk(ierr); 457 CeedTransposeMode lmode; 458 CeedOperatorField *opinputfields, *opoutputfields; 459 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 460 CeedChk(ierr); 461 CeedQFunctionField *qfinputfields, *qfoutputfields; 462 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 463 CeedChk(ierr); 464 CeedEvalMode emode; 465 CeedVector vec; 466 467 // Setup 468 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 469 470 // Input Evecs and Restriction 471 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 472 opinputfields, invec, false, impl, 473 request); CeedChk(ierr); 474 475 // Output Evecs 476 for (CeedInt i=0; i<numoutputfields; i++) { 477 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 478 &impl->edata[i + numinputfields]); CeedChk(ierr); 479 } 480 481 // Loop through elements 482 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 483 // Output pointers 484 for (CeedInt i=0; i<numoutputfields; i++) { 485 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 486 CeedChk(ierr); 487 if (emode == CEED_EVAL_NONE) { 488 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 489 CeedChk(ierr); 490 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 491 CEED_USE_POINTER, 492 &impl->edata[i + numinputfields][e*Q*size]); 493 CeedChk(ierr); 494 } 495 } 496 497 // Input basis apply 498 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 499 numinputfields, blksize, false, impl); 500 CeedChk(ierr); 501 502 // Q function 503 if (!impl->identityqf) { 504 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 505 CeedChk(ierr); 506 } 507 508 // Output basis apply 509 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 510 blksize, numinputfields, 511 numoutputfields, op, impl); 512 CeedChk(ierr); 513 } 514 515 // Output restriction 516 for (CeedInt i=0; i<numoutputfields; i++) { 517 // Restore evec 518 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 519 &impl->edata[i + numinputfields]); CeedChk(ierr); 520 // Get output vector 521 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 522 // Active 523 if (vec == CEED_VECTOR_ACTIVE) 524 vec = outvec; 525 // Restrict 526 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 527 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 528 lmode, impl->evecs[i+impl->numein], vec, 529 request); CeedChk(ierr); 530 531 } 532 533 // Restore input arrays 534 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 535 opinputfields, false, impl); CeedChk(ierr); 536 537 return 0; 538 } 539 540 // Assemble Linear QFunction 541 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 542 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 543 int ierr; 544 CeedOperator_Blocked *impl; 545 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 546 const CeedInt blksize = 8; 547 CeedInt Q, numinputfields, numoutputfields, numelements, size; 548 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 549 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 550 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 551 CeedQFunction qf; 552 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 553 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 554 CeedChk(ierr); 555 CeedOperatorField *opinputfields, *opoutputfields; 556 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 557 CeedChk(ierr); 558 CeedQFunctionField *qfinputfields, *qfoutputfields; 559 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 560 CeedChk(ierr); 561 CeedVector vec, lvec; 562 CeedInt numactivein = 0, numactiveout = 0; 563 CeedVector *activein = NULL; 564 CeedScalar *a, *tmp; 565 Ceed ceed; 566 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 567 568 // Setup 569 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 570 571 // Check for identity 572 if (impl->identityqf) 573 // LCOV_EXCL_START 574 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 575 // LCOV_EXCL_STOP 576 577 // Input Evecs and Restriction 578 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 579 opinputfields, NULL, true, impl, 580 request); CeedChk(ierr); 581 582 // Count number of active input fields 583 for (CeedInt i=0; i<numinputfields; i++) { 584 // Get input vector 585 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 586 // Check if active input 587 if (vec == CEED_VECTOR_ACTIVE) { 588 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 589 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 590 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 591 CeedChk(ierr); 592 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 593 for (CeedInt field=0; field<size; field++) { 594 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 595 CeedChk(ierr); 596 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 597 CEED_USE_POINTER, &tmp[field*Q*blksize]); 598 CeedChk(ierr); 599 } 600 numactivein += size; 601 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 602 } 603 } 604 605 // Count number of active output fields 606 for (CeedInt i=0; i<numoutputfields; i++) { 607 // Get output vector 608 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 609 // Check if active output 610 if (vec == CEED_VECTOR_ACTIVE) { 611 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 612 numactiveout += size; 613 } 614 } 615 616 // Check sizes 617 if (!numactivein || !numactiveout) 618 // LCOV_EXCL_START 619 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 620 "and outputs"); 621 // LCOV_EXCL_STOP 622 623 // Setup lvec 624 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 625 &lvec); CeedChk(ierr); 626 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 627 628 // Create output restriction 629 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 630 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 631 // Create assembled vector 632 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 633 assembled); CeedChk(ierr); 634 635 // Loop through elements 636 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 637 // Input basis apply 638 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 639 numinputfields, blksize, true, impl); 640 CeedChk(ierr); 641 642 // Assemble QFunction 643 for (CeedInt in=0; in<numactivein; in++) { 644 // Set Inputs 645 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 646 if (numactivein > 1) { 647 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 648 0.0); CeedChk(ierr); 649 } 650 // Set Outputs 651 for (CeedInt out=0; out<numoutputfields; out++) { 652 // Get output vector 653 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 654 CeedChk(ierr); 655 // Check if active output 656 if (vec == CEED_VECTOR_ACTIVE) { 657 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 658 CEED_USE_POINTER, a); CeedChk(ierr); 659 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 660 CeedChk(ierr); 661 a += size*Q*blksize; // Advance the pointer by the size of the output 662 } 663 } 664 // Apply QFunction 665 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 666 CeedChk(ierr); 667 } 668 } 669 670 // Un-set output Qvecs to prevent accidental overwrite of Assembled 671 for (CeedInt out=0; out<numoutputfields; out++) { 672 // Get output vector 673 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 674 CeedChk(ierr); 675 // Check if active output 676 if (vec == CEED_VECTOR_ACTIVE) { 677 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 678 NULL); CeedChk(ierr); 679 } 680 } 681 682 // Restore input arrays 683 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 684 opinputfields, true, impl); CeedChk(ierr); 685 686 // Output blocked restriction 687 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 688 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 689 CeedElemRestriction blkrstr; 690 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 691 numelements*Q, 692 numactivein*numactiveout, 693 CEED_MEM_HOST, CEED_COPY_VALUES, 694 NULL, &blkrstr); CeedChk(ierr); 695 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 696 lvec, *assembled, request); CeedChk(ierr); 697 698 // Cleanup 699 for (CeedInt i=0; i<numactivein; i++) { 700 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 701 } 702 ierr = CeedFree(&activein); CeedChk(ierr); 703 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 704 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 705 706 return 0; 707 } 708 709 int CeedOperatorCreate_Blocked(CeedOperator op) { 710 int ierr; 711 Ceed ceed; 712 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 713 CeedOperator_Blocked *impl; 714 715 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 716 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 717 718 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 719 CeedOperatorAssembleLinearQFunction_Blocked); 720 CeedChk(ierr); 721 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 722 CeedOperatorApply_Blocked); CeedChk(ierr); 723 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 724 CeedOperatorDestroy_Blocked); CeedChk(ierr); 725 return 0; 726 } 727