1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 21 int ierr; 22 CeedOperator_Blocked *impl; 23 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 24 25 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 27 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 28 } 29 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 30 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 31 ierr = CeedFree(&impl->edata); CeedChk(ierr); 32 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numein; i++) { 35 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 36 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 37 } 38 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 39 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 40 41 for (CeedInt i=0; i<impl->numeout; i++) { 42 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 43 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 44 } 45 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 46 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 47 48 ierr = CeedFree(&impl); CeedChk(ierr); 49 return 0; 50 } 51 52 /* 53 Setup infields or outfields 54 */ 55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 56 CeedOperator op, bool inOrOut, 57 CeedElemRestriction *blkrestr, 58 CeedVector *fullevecs, CeedVector *evecs, 59 CeedVector *qvecs, CeedInt starte, 60 CeedInt numfields, CeedInt Q) { 61 CeedInt dim, ierr, ncomp, size, P; 62 Ceed ceed; 63 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 64 CeedBasis basis; 65 CeedElemRestriction r; 66 CeedOperatorField *opfields; 67 CeedQFunctionField *qffields; 68 if (inOrOut) { 69 ierr = CeedOperatorGetFields(op, NULL, &opfields); 70 CeedChk(ierr); 71 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 72 CeedChk(ierr); 73 } else { 74 ierr = CeedOperatorGetFields(op, &opfields, NULL); 75 CeedChk(ierr); 76 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 77 CeedChk(ierr); 78 } 79 const CeedInt blksize = 8; 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 112 break; 113 case CEED_EVAL_INTERP: 114 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 115 ierr = CeedElemRestrictionGetElementSize(r, &P); 116 CeedChk(ierr); 117 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 119 break; 120 case CEED_EVAL_GRAD: 121 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 122 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 123 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 124 ierr = CeedElemRestrictionGetElementSize(r, &P); 125 CeedChk(ierr); 126 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 128 break; 129 case CEED_EVAL_WEIGHT: // Only on input fields 130 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 131 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 132 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 133 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 134 CeedChk(ierr); 135 136 break; 137 case CEED_EVAL_DIV: 138 break; // Not implemented 139 case CEED_EVAL_CURL: 140 break; // Not implemented 141 } 142 } 143 return 0; 144 } 145 146 /* 147 CeedOperator needs to connect all the named fields (be they active or passive) 148 to the named inputs and outputs of its CeedQFunction. 149 */ 150 static int CeedOperatorSetup_Blocked(CeedOperator op) { 151 int ierr; 152 bool setupdone; 153 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 154 if (setupdone) return 0; 155 Ceed ceed; 156 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 157 CeedOperator_Blocked *impl; 158 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 159 CeedQFunction qf; 160 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 161 CeedInt Q, numinputfields, numoutputfields; 162 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 163 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 164 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 165 CeedChk(ierr); 166 CeedOperatorField *opinputfields, *opoutputfields; 167 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 168 CeedChk(ierr); 169 CeedQFunctionField *qfinputfields, *qfoutputfields; 170 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 171 CeedChk(ierr); 172 173 // Allocate 174 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 175 CeedChk(ierr); 176 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 177 CeedChk(ierr); 178 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 179 CeedChk(ierr); 180 181 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 182 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 183 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 184 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 185 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 186 187 impl->numein = numinputfields; impl->numeout = numoutputfields; 188 189 // Set up infield and outfield pointer arrays 190 // Infields 191 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 192 impl->evecs, impl->evecsin, 193 impl->qvecsin, 0, 194 numinputfields, Q); 195 CeedChk(ierr); 196 // Outfields 197 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 198 impl->evecs, impl->evecsout, 199 impl->qvecsout, numinputfields, 200 numoutputfields, Q); 201 CeedChk(ierr); 202 203 // Identity QFunctions 204 if (impl->identityqf) { 205 CeedEvalMode inmode, outmode; 206 CeedQFunctionField *infields, *outfields; 207 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 208 209 for (CeedInt i=0; i<numinputfields; i++) { 210 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 211 CeedChk(ierr); 212 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 213 CeedChk(ierr); 214 215 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 216 impl->qvecsout[i] = impl->qvecsin[i]; 217 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 218 } 219 } 220 221 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 222 223 return 0; 224 } 225 226 // Setup Input fields 227 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 228 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 229 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 230 CeedRequest *request) { 231 CeedInt ierr; 232 CeedEvalMode emode; 233 CeedVector vec; 234 CeedTransposeMode lmode; 235 uint64_t state; 236 237 for (CeedInt i=0; i<numinputfields; i++) { 238 // Get input vector 239 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 240 if (vec == CEED_VECTOR_ACTIVE) { 241 if (skipactive) 242 continue; 243 else 244 vec = invec; 245 } 246 247 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 248 CeedChk(ierr); 249 if (emode == CEED_EVAL_WEIGHT) { // Skip 250 } else { 251 // Restrict 252 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 253 if (state != impl->inputstate[i] || vec == invec) { 254 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 255 CeedChk(ierr); 256 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 257 lmode, vec, impl->evecs[i], 258 request); CeedChk(ierr); CeedChk(ierr); 259 impl->inputstate[i] = state; 260 } 261 // Get evec 262 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 263 (const CeedScalar **) &impl->edata[i]); 264 CeedChk(ierr); 265 } 266 } 267 return 0; 268 } 269 270 // Input basis action 271 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 272 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 273 CeedInt numinputfields, CeedInt blksize, bool skipactive, 274 CeedOperator_Blocked *impl) { 275 CeedInt ierr; 276 CeedInt dim, elemsize, size; 277 CeedElemRestriction Erestrict; 278 CeedEvalMode emode; 279 CeedBasis basis; 280 281 for (CeedInt i=0; i<numinputfields; i++) { 282 // Skip active input 283 if (skipactive) { 284 CeedVector vec; 285 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 286 if (vec == CEED_VECTOR_ACTIVE) 287 continue; 288 } 289 290 // Get elemsize, emode, size 291 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 292 CeedChk(ierr); 293 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 294 CeedChk(ierr); 295 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 296 CeedChk(ierr); 297 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 298 // Basis action 299 switch(emode) { 300 case CEED_EVAL_NONE: 301 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 302 CEED_USE_POINTER, 303 &impl->edata[i][e*Q*size]); CeedChk(ierr); 304 break; 305 case CEED_EVAL_INTERP: 306 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 307 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 308 CEED_USE_POINTER, 309 &impl->edata[i][e*elemsize*size]); 310 CeedChk(ierr); 311 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 312 CEED_EVAL_INTERP, impl->evecsin[i], 313 impl->qvecsin[i]); CeedChk(ierr); 314 break; 315 case CEED_EVAL_GRAD: 316 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 317 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 318 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 319 CEED_USE_POINTER, 320 &impl->edata[i][e*elemsize*size/dim]); 321 CeedChk(ierr); 322 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 323 CEED_EVAL_GRAD, impl->evecsin[i], 324 impl->qvecsin[i]); CeedChk(ierr); 325 break; 326 case CEED_EVAL_WEIGHT: 327 break; // No action 328 case CEED_EVAL_DIV: 329 case CEED_EVAL_CURL: { 330 // LCOV_EXCL_START 331 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 332 CeedChk(ierr); 333 Ceed ceed; 334 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 335 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 336 // LCOV_EXCL_STOP 337 break; // Not implemented 338 } 339 } 340 } 341 return 0; 342 } 343 344 // Output basis action 345 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 346 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 347 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 348 CeedOperator op, CeedOperator_Blocked *impl) { 349 CeedInt ierr; 350 CeedInt dim, elemsize, size; 351 CeedElemRestriction Erestrict; 352 CeedEvalMode emode; 353 CeedBasis basis; 354 355 for (CeedInt i=0; i<numoutputfields; i++) { 356 // Get elemsize, emode, size 357 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 358 CeedChk(ierr); 359 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 360 CeedChk(ierr); 361 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 362 CeedChk(ierr); 363 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 364 // Basis action 365 switch(emode) { 366 case CEED_EVAL_NONE: 367 break; // No action 368 case CEED_EVAL_INTERP: 369 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 370 CeedChk(ierr); 371 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 372 CEED_USE_POINTER, 373 &impl->edata[i + numinputfields][e*elemsize*size]); 374 CeedChk(ierr); 375 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 376 CEED_EVAL_INTERP, impl->qvecsout[i], 377 impl->evecsout[i]); CeedChk(ierr); 378 break; 379 case CEED_EVAL_GRAD: 380 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 381 CeedChk(ierr); 382 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 383 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 384 CEED_USE_POINTER, 385 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 386 CeedChk(ierr); 387 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 388 CEED_EVAL_GRAD, impl->qvecsout[i], 389 impl->evecsout[i]); CeedChk(ierr); 390 break; 391 case CEED_EVAL_WEIGHT: { 392 // LCOV_EXCL_START 393 Ceed ceed; 394 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 395 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 396 "evaluation mode"); 397 // LCOV_EXCL_STOP 398 break; // Should not occur 399 } 400 case CEED_EVAL_DIV: 401 case CEED_EVAL_CURL: { 402 // LCOV_EXCL_START 403 Ceed ceed; 404 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 405 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 406 // LCOV_EXCL_STOP 407 break; // Not implemented 408 } 409 } 410 } 411 return 0; 412 } 413 414 // Restore Inputs 415 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 416 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 417 bool skipactive, CeedOperator_Blocked *impl) { 418 CeedInt ierr; 419 CeedEvalMode emode; 420 421 for (CeedInt i=0; i<numinputfields; i++) { 422 // Skip active inputs 423 if (skipactive) { 424 CeedVector vec; 425 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 426 if (vec == CEED_VECTOR_ACTIVE) 427 continue; 428 } 429 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 430 CeedChk(ierr); 431 if (emode == CEED_EVAL_WEIGHT) { // Skip 432 } else { 433 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 434 (const CeedScalar **) &impl->edata[i]); 435 CeedChk(ierr); 436 } 437 } 438 return 0; 439 } 440 441 // Apply Ceed Operator 442 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 443 CeedVector outvec, 444 CeedRequest *request) { 445 int ierr; 446 CeedOperator_Blocked *impl; 447 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 448 const CeedInt blksize = 8; 449 CeedInt Q, numinputfields, numoutputfields, numelements, size; 450 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 451 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 452 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 453 CeedQFunction qf; 454 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 455 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 456 CeedChk(ierr); 457 CeedTransposeMode lmode; 458 CeedOperatorField *opinputfields, *opoutputfields; 459 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 460 CeedChk(ierr); 461 CeedQFunctionField *qfinputfields, *qfoutputfields; 462 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 463 CeedChk(ierr); 464 CeedEvalMode emode; 465 CeedVector vec; 466 467 // Setup 468 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 469 470 // Input Evecs and Restriction 471 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 472 opinputfields, invec, false, impl, 473 request); CeedChk(ierr); 474 475 // Output Evecs 476 for (CeedInt i=0; i<numoutputfields; i++) { 477 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 478 &impl->edata[i + numinputfields]); CeedChk(ierr); 479 } 480 481 // Loop through elements 482 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 483 // Output pointers 484 for (CeedInt i=0; i<numoutputfields; i++) { 485 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 486 CeedChk(ierr); 487 if (emode == CEED_EVAL_NONE) { 488 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 489 CeedChk(ierr); 490 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 491 CEED_USE_POINTER, 492 &impl->edata[i + numinputfields][e*Q*size]); 493 CeedChk(ierr); 494 } 495 } 496 497 // Input basis apply 498 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 499 numinputfields, blksize, false, impl); 500 CeedChk(ierr); 501 502 // Q function 503 if (!impl->identityqf) { 504 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 505 CeedChk(ierr); 506 } 507 508 // Output basis apply 509 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 510 blksize, numinputfields, 511 numoutputfields, op, impl); 512 CeedChk(ierr); 513 } 514 515 // Zero lvecs 516 for (CeedInt i=0; i<numoutputfields; i++) { 517 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 518 if (vec == CEED_VECTOR_ACTIVE) { 519 if (!impl->add) { 520 vec = outvec; 521 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 522 } 523 } else { 524 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 525 } 526 } 527 impl->add = false; 528 529 // Output restriction 530 for (CeedInt i=0; i<numoutputfields; i++) { 531 // Restore evec 532 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 533 &impl->edata[i + numinputfields]); CeedChk(ierr); 534 // Get output vector 535 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 536 // Active 537 if (vec == CEED_VECTOR_ACTIVE) 538 vec = outvec; 539 // Restrict 540 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 541 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 542 lmode, impl->evecs[i+impl->numein], vec, 543 request); CeedChk(ierr); 544 545 } 546 547 // Restore input arrays 548 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 549 opinputfields, false, impl); CeedChk(ierr); 550 551 return 0; 552 } 553 554 // Assemble Linear QFunction 555 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 556 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 557 int ierr; 558 CeedOperator_Blocked *impl; 559 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 560 const CeedInt blksize = 8; 561 CeedInt Q, numinputfields, numoutputfields, numelements, size; 562 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 563 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 564 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 565 CeedQFunction qf; 566 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 567 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 568 CeedChk(ierr); 569 CeedOperatorField *opinputfields, *opoutputfields; 570 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 571 CeedChk(ierr); 572 CeedQFunctionField *qfinputfields, *qfoutputfields; 573 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 574 CeedChk(ierr); 575 CeedVector vec, lvec; 576 CeedInt numactivein = 0, numactiveout = 0; 577 CeedVector *activein = NULL; 578 CeedScalar *a, *tmp; 579 Ceed ceed; 580 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 581 582 // Setup 583 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 584 585 // Check for identity 586 if (impl->identityqf) 587 // LCOV_EXCL_START 588 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 589 // LCOV_EXCL_STOP 590 591 // Input Evecs and Restriction 592 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 593 opinputfields, NULL, true, impl, 594 request); CeedChk(ierr); 595 596 // Count number of active input fields 597 for (CeedInt i=0; i<numinputfields; i++) { 598 // Get input vector 599 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 600 // Check if active input 601 if (vec == CEED_VECTOR_ACTIVE) { 602 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 603 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 604 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 605 CeedChk(ierr); 606 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 607 for (CeedInt field=0; field<size; field++) { 608 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 609 CeedChk(ierr); 610 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 611 CEED_USE_POINTER, &tmp[field*Q*blksize]); 612 CeedChk(ierr); 613 } 614 numactivein += size; 615 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 616 } 617 } 618 619 // Count number of active output fields 620 for (CeedInt i=0; i<numoutputfields; i++) { 621 // Get output vector 622 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 623 // Check if active output 624 if (vec == CEED_VECTOR_ACTIVE) { 625 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 626 numactiveout += size; 627 } 628 } 629 630 // Check sizes 631 if (!numactivein || !numactiveout) 632 // LCOV_EXCL_START 633 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 634 "and outputs"); 635 // LCOV_EXCL_STOP 636 637 // Setup lvec 638 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 639 &lvec); CeedChk(ierr); 640 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 641 642 // Create output restriction 643 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 644 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 645 // Create assembled vector 646 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 647 assembled); CeedChk(ierr); 648 649 // Loop through elements 650 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 651 // Input basis apply 652 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 653 numinputfields, blksize, true, impl); 654 CeedChk(ierr); 655 656 // Assemble QFunction 657 for (CeedInt in=0; in<numactivein; in++) { 658 // Set Inputs 659 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 660 if (numactivein > 1) { 661 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 662 0.0); CeedChk(ierr); 663 } 664 // Set Outputs 665 for (CeedInt out=0; out<numoutputfields; out++) { 666 // Get output vector 667 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 668 CeedChk(ierr); 669 // Check if active output 670 if (vec == CEED_VECTOR_ACTIVE) { 671 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 672 CEED_USE_POINTER, a); CeedChk(ierr); 673 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 674 CeedChk(ierr); 675 a += size*Q*blksize; // Advance the pointer by the size of the output 676 } 677 } 678 // Apply QFunction 679 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 680 CeedChk(ierr); 681 } 682 } 683 684 // Un-set output Qvecs to prevent accidental overwrite of Assembled 685 for (CeedInt out=0; out<numoutputfields; out++) { 686 // Get output vector 687 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 688 CeedChk(ierr); 689 // Check if active output 690 if (vec == CEED_VECTOR_ACTIVE) { 691 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 692 NULL); CeedChk(ierr); 693 } 694 } 695 696 // Restore input arrays 697 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 698 opinputfields, true, impl); CeedChk(ierr); 699 700 // Output blocked restriction 701 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 702 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 703 CeedElemRestriction blkrstr; 704 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 705 numelements*Q, 706 numactivein*numactiveout, 707 CEED_MEM_HOST, CEED_COPY_VALUES, 708 NULL, &blkrstr); CeedChk(ierr); 709 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 710 lvec, *assembled, request); CeedChk(ierr); 711 712 // Cleanup 713 for (CeedInt i=0; i<numactivein; i++) { 714 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 715 } 716 ierr = CeedFree(&activein); CeedChk(ierr); 717 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 718 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 719 720 return 0; 721 } 722 723 int CeedOperatorCreate_Blocked(CeedOperator op) { 724 int ierr; 725 Ceed ceed; 726 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 727 CeedOperator_Blocked *impl; 728 729 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 730 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 731 732 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 733 CeedOperatorAssembleLinearQFunction_Blocked); 734 CeedChk(ierr); 735 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 736 CeedOperatorApply_Blocked); CeedChk(ierr); 737 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 738 CeedOperatorDestroy_Blocked); CeedChk(ierr); 739 return 0; 740 } 741