1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 21 int ierr; 22 CeedOperator_Blocked *impl; 23 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 24 25 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 27 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 28 } 29 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 30 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 31 ierr = CeedFree(&impl->edata); CeedChk(ierr); 32 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numein; i++) { 35 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 36 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 37 } 38 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 39 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 40 41 for (CeedInt i=0; i<impl->numeout; i++) { 42 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 43 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 44 } 45 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 46 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 47 48 ierr = CeedFree(&impl); CeedChk(ierr); 49 return 0; 50 } 51 52 /* 53 Setup infields or outfields 54 */ 55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 56 CeedOperator op, bool inOrOut, 57 CeedElemRestriction *blkrestr, 58 CeedVector *fullevecs, CeedVector *evecs, 59 CeedVector *qvecs, CeedInt starte, 60 CeedInt numfields, CeedInt Q) { 61 CeedInt dim, ierr, ncomp, size, P; 62 Ceed ceed; 63 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 64 CeedBasis basis; 65 CeedElemRestriction r; 66 CeedOperatorField *opfields; 67 CeedQFunctionField *qffields; 68 if (inOrOut) { 69 ierr = CeedOperatorGetFields(op, NULL, &opfields); 70 CeedChk(ierr); 71 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 72 CeedChk(ierr); 73 } else { 74 ierr = CeedOperatorGetFields(op, &opfields, NULL); 75 CeedChk(ierr); 76 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 77 CeedChk(ierr); 78 } 79 const CeedInt blksize = 8; 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 112 break; 113 case CEED_EVAL_INTERP: 114 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 115 ierr = CeedElemRestrictionGetElementSize(r, &P); 116 CeedChk(ierr); 117 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 119 break; 120 case CEED_EVAL_GRAD: 121 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 122 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 123 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 124 ierr = CeedElemRestrictionGetElementSize(r, &P); 125 CeedChk(ierr); 126 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 128 break; 129 case CEED_EVAL_WEIGHT: // Only on input fields 130 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 131 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 132 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 133 CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 134 135 break; 136 case CEED_EVAL_DIV: 137 break; // Not implemented 138 case CEED_EVAL_CURL: 139 break; // Not implemented 140 } 141 } 142 return 0; 143 } 144 145 /* 146 CeedOperator needs to connect all the named fields (be they active or passive) 147 to the named inputs and outputs of its CeedQFunction. 148 */ 149 static int CeedOperatorSetup_Blocked(CeedOperator op) { 150 int ierr; 151 bool setupdone; 152 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 153 if (setupdone) return 0; 154 Ceed ceed; 155 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 156 CeedOperator_Blocked *impl; 157 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 158 CeedQFunction qf; 159 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 160 CeedInt Q, numinputfields, numoutputfields; 161 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 162 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 163 CeedChk(ierr); 164 CeedOperatorField *opinputfields, *opoutputfields; 165 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 166 CeedChk(ierr); 167 CeedQFunctionField *qfinputfields, *qfoutputfields; 168 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 169 CeedChk(ierr); 170 171 // Allocate 172 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 173 CeedChk(ierr); 174 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 175 CeedChk(ierr); 176 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 177 CeedChk(ierr); 178 179 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 180 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 181 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 182 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 183 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 184 185 impl->numein = numinputfields; impl->numeout = numoutputfields; 186 187 // Set up infield and outfield pointer arrays 188 // Infields 189 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 190 impl->evecs, impl->evecsin, 191 impl->qvecsin, 0, 192 numinputfields, Q); 193 CeedChk(ierr); 194 // Outfields 195 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 196 impl->evecs, impl->evecsout, 197 impl->qvecsout, numinputfields, 198 numoutputfields, Q); 199 CeedChk(ierr); 200 201 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 202 203 return 0; 204 } 205 206 207 // Setup Input fields 208 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 209 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 210 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 211 CeedRequest *request) { 212 CeedInt ierr; 213 CeedEvalMode emode; 214 CeedVector vec; 215 CeedTransposeMode lmode; 216 uint64_t state; 217 218 for (CeedInt i=0; i<numinputfields; i++) { 219 // Get input vector 220 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 221 if (vec == CEED_VECTOR_ACTIVE) { 222 if (skipactive) 223 continue; 224 else 225 vec = invec; 226 } 227 228 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 229 CeedChk(ierr); 230 if (emode == CEED_EVAL_WEIGHT) { // Skip 231 } else { 232 // Restrict 233 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 234 if (state != impl->inputstate[i] || vec == invec) { 235 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 236 CeedChk(ierr); 237 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 238 lmode, vec, impl->evecs[i], 239 request); CeedChk(ierr); CeedChk(ierr); 240 impl->inputstate[i] = state; 241 } 242 // Get evec 243 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 244 (const CeedScalar **) &impl->edata[i]); 245 CeedChk(ierr); 246 } 247 } 248 return 0; 249 } 250 251 // Input basis action 252 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 253 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 254 CeedInt numinputfields, CeedInt blksize, bool skipactive, 255 CeedOperator_Blocked *impl) { 256 CeedInt ierr; 257 CeedInt dim, elemsize, size; 258 CeedElemRestriction Erestrict; 259 CeedEvalMode emode; 260 CeedBasis basis; 261 262 for (CeedInt i=0; i<numinputfields; i++) { 263 // Skip active input 264 if (skipactive) { 265 CeedVector vec; 266 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 267 if (vec == CEED_VECTOR_ACTIVE) 268 continue; 269 } 270 271 // Get elemsize, emode, size 272 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 273 CeedChk(ierr); 274 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 275 CeedChk(ierr); 276 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 277 CeedChk(ierr); 278 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 279 // Basis action 280 switch(emode) { 281 case CEED_EVAL_NONE: 282 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 283 CEED_USE_POINTER, 284 &impl->edata[i][e*Q*size]); CeedChk(ierr); 285 break; 286 case CEED_EVAL_INTERP: 287 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 288 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 289 CEED_USE_POINTER, 290 &impl->edata[i][e*elemsize*size]); 291 CeedChk(ierr); 292 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 293 CEED_EVAL_INTERP, impl->evecsin[i], 294 impl->qvecsin[i]); CeedChk(ierr); 295 break; 296 case CEED_EVAL_GRAD: 297 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 298 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 299 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 300 CEED_USE_POINTER, 301 &impl->edata[i][e*elemsize*size/dim]); 302 CeedChk(ierr); 303 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 304 CEED_EVAL_GRAD, impl->evecsin[i], 305 impl->qvecsin[i]); CeedChk(ierr); 306 break; 307 case CEED_EVAL_WEIGHT: 308 break; // No action 309 case CEED_EVAL_DIV: 310 case CEED_EVAL_CURL: { 311 // LCOV_EXCL_START 312 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 313 CeedChk(ierr); 314 Ceed ceed; 315 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 316 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 317 // LCOV_EXCL_STOP 318 break; // Not implemented 319 } 320 } 321 } 322 return 0; 323 } 324 325 // Output basis action 326 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 327 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 328 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 329 CeedOperator op, CeedOperator_Blocked *impl) { 330 CeedInt ierr; 331 CeedInt dim, elemsize, size; 332 CeedElemRestriction Erestrict; 333 CeedEvalMode emode; 334 CeedBasis basis; 335 336 for (CeedInt i=0; i<numoutputfields; i++) { 337 // Get elemsize, emode, size 338 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 339 CeedChk(ierr); 340 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 341 CeedChk(ierr); 342 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 343 CeedChk(ierr); 344 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 345 // Basis action 346 switch(emode) { 347 case CEED_EVAL_NONE: 348 break; // No action 349 case CEED_EVAL_INTERP: 350 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 351 CeedChk(ierr); 352 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 353 CEED_USE_POINTER, 354 &impl->edata[i + numinputfields][e*elemsize*size]); 355 CeedChk(ierr); 356 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 357 CEED_EVAL_INTERP, impl->qvecsout[i], 358 impl->evecsout[i]); CeedChk(ierr); 359 break; 360 case CEED_EVAL_GRAD: 361 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 362 CeedChk(ierr); 363 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 364 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 365 CEED_USE_POINTER, 366 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 367 CeedChk(ierr); 368 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 369 CEED_EVAL_GRAD, impl->qvecsout[i], 370 impl->evecsout[i]); CeedChk(ierr); 371 break; 372 case CEED_EVAL_WEIGHT: { 373 // LCOV_EXCL_START 374 Ceed ceed; 375 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 376 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 377 "evaluation mode"); 378 // LCOV_EXCL_STOP 379 break; // Should not occur 380 } 381 case CEED_EVAL_DIV: 382 case CEED_EVAL_CURL: { 383 // LCOV_EXCL_START 384 Ceed ceed; 385 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 386 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 387 // LCOV_EXCL_STOP 388 break; // Not implemented 389 } 390 } 391 } 392 return 0; 393 } 394 395 // Restore Inputs 396 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 397 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 398 bool skipactive, CeedOperator_Blocked *impl) { 399 CeedInt ierr; 400 CeedEvalMode emode; 401 402 for (CeedInt i=0; i<numinputfields; i++) { 403 // Skip active inputs 404 if (skipactive) { 405 CeedVector vec; 406 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 407 if (vec == CEED_VECTOR_ACTIVE) 408 continue; 409 } 410 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 411 CeedChk(ierr); 412 if (emode == CEED_EVAL_WEIGHT) { // Skip 413 } else { 414 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 415 (const CeedScalar **) &impl->edata[i]); 416 CeedChk(ierr); 417 } 418 } 419 return 0; 420 } 421 422 // Apply Ceed Operator 423 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 424 CeedVector outvec, 425 CeedRequest *request) { 426 int ierr; 427 CeedOperator_Blocked *impl; 428 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 429 const CeedInt blksize = 8; 430 CeedInt Q, numinputfields, numoutputfields, numelements, size; 431 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 432 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 433 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 434 CeedQFunction qf; 435 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 436 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 437 CeedChk(ierr); 438 CeedTransposeMode lmode; 439 CeedOperatorField *opinputfields, *opoutputfields; 440 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 441 CeedChk(ierr); 442 CeedQFunctionField *qfinputfields, *qfoutputfields; 443 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 444 CeedChk(ierr); 445 CeedEvalMode emode; 446 CeedVector vec; 447 448 // Setup 449 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 450 451 // Input Evecs and Restriction 452 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 453 opinputfields, invec, false, impl, 454 request); CeedChk(ierr); 455 456 // Output Evecs 457 for (CeedInt i=0; i<numoutputfields; i++) { 458 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 459 &impl->edata[i + numinputfields]); CeedChk(ierr); 460 } 461 462 // Loop through elements 463 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 464 // Input basis apply 465 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 466 numinputfields, blksize, false, impl); 467 CeedChk(ierr); 468 469 // Output pointers 470 for (CeedInt i=0; i<numoutputfields; i++) { 471 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 472 CeedChk(ierr); 473 if (emode == CEED_EVAL_NONE) { 474 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 475 CeedChk(ierr); 476 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 477 CEED_USE_POINTER, 478 &impl->edata[i + numinputfields][e*Q*size]); 479 CeedChk(ierr); 480 } 481 } 482 483 // Q function 484 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 485 CeedChk(ierr); 486 487 // Output basis apply 488 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 489 blksize, numinputfields, 490 numoutputfields, op, impl); 491 CeedChk(ierr); 492 } 493 494 // Zero lvecs 495 for (CeedInt i=0; i<numoutputfields; i++) { 496 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 497 if (vec == CEED_VECTOR_ACTIVE) { 498 if (!impl->add) { 499 vec = outvec; 500 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 501 } 502 } else { 503 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 504 } 505 } 506 impl->add = false; 507 508 // Output restriction 509 for (CeedInt i=0; i<numoutputfields; i++) { 510 // Restore evec 511 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 512 &impl->edata[i + numinputfields]); CeedChk(ierr); 513 // Get output vector 514 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 515 // Active 516 if (vec == CEED_VECTOR_ACTIVE) 517 vec = outvec; 518 // Restrict 519 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 520 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 521 lmode, impl->evecs[i+impl->numein], vec, 522 request); CeedChk(ierr); 523 524 } 525 526 // Restore input arrays 527 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 528 opinputfields, false, impl); CeedChk(ierr); 529 530 return 0; 531 } 532 533 // Assemble Linear QFunction 534 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 535 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 536 int ierr; 537 CeedOperator_Blocked *impl; 538 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 539 const CeedInt blksize = 8; 540 CeedInt Q, numinputfields, numoutputfields, numelements, size; 541 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 542 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 543 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 544 CeedQFunction qf; 545 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 546 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 547 CeedChk(ierr); 548 CeedOperatorField *opinputfields, *opoutputfields; 549 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 550 CeedChk(ierr); 551 CeedQFunctionField *qfinputfields, *qfoutputfields; 552 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 553 CeedChk(ierr); 554 CeedVector vec, lvec; 555 CeedInt numactivein = 0, numactiveout = 0; 556 CeedVector *activein = NULL; 557 CeedScalar *a, *tmp; 558 Ceed ceed; 559 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 560 561 // Setup 562 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 563 564 // Input Evecs and Restriction 565 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 566 opinputfields, NULL, true, impl, 567 request); CeedChk(ierr); 568 569 // Count number of active input fields 570 for (CeedInt i=0; i<numinputfields; i++) { 571 // Get input vector 572 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 573 // Check if active input 574 if (vec == CEED_VECTOR_ACTIVE) { 575 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 576 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 577 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 578 CeedChk(ierr); 579 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 580 for (CeedInt field=0; field<size; field++) { 581 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 582 CeedChk(ierr); 583 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 584 CEED_USE_POINTER, &tmp[field*Q*blksize]); 585 CeedChk(ierr); 586 } 587 numactivein += size; 588 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 589 } 590 } 591 592 // Count number of active output fields 593 for (CeedInt i=0; i<numoutputfields; i++) { 594 // Get output vector 595 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 596 // Check if active output 597 if (vec == CEED_VECTOR_ACTIVE) { 598 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 599 numactiveout += size; 600 } 601 } 602 603 // Check sizes 604 if (!numactivein || !numactiveout) 605 // LCOV_EXCL_START 606 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 607 "and outputs"); 608 // LCOV_EXCL_STOP 609 610 // Setup lvec 611 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 612 &lvec); CeedChk(ierr); 613 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 614 615 // Create output restriction 616 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 617 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 618 // Create assembled vector 619 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 620 assembled); CeedChk(ierr); 621 622 // Loop through elements 623 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 624 // Input basis apply 625 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 626 numinputfields, blksize, true, impl); 627 CeedChk(ierr); 628 629 // Assemble QFunction 630 for (CeedInt in=0; in<numactivein; in++) { 631 // Set Inputs 632 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 633 if (numactivein > 1) { 634 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 635 0.0); CeedChk(ierr); 636 } 637 // Set Outputs 638 for (CeedInt out=0; out<numoutputfields; out++) { 639 // Get output vector 640 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 641 CeedChk(ierr); 642 // Check if active output 643 if (vec == CEED_VECTOR_ACTIVE) { 644 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 645 CEED_USE_POINTER, a); CeedChk(ierr); 646 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 647 CeedChk(ierr); 648 a += size*Q*blksize; // Advance the pointer by the size of the output 649 } 650 } 651 // Apply QFunction 652 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 653 CeedChk(ierr); 654 } 655 } 656 657 // Un-set output Qvecs to prevent accidental overwrite of Assembled 658 for (CeedInt out=0; out<numoutputfields; out++) { 659 // Get output vector 660 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 661 CeedChk(ierr); 662 // Check if active output 663 if (vec == CEED_VECTOR_ACTIVE) { 664 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 665 NULL); CeedChk(ierr); 666 } 667 } 668 669 // Restore input arrays 670 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 671 opinputfields, true, impl); CeedChk(ierr); 672 673 // Output blocked restriction 674 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 675 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 676 CeedElemRestriction blkrstr; 677 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 678 numelements*Q, 679 numactivein*numactiveout, 680 CEED_MEM_HOST, CEED_COPY_VALUES, 681 NULL, &blkrstr); CeedChk(ierr); 682 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 683 lvec, *assembled, request); CeedChk(ierr); 684 685 // Cleanup 686 for (CeedInt i=0; i<numactivein; i++) { 687 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 688 } 689 ierr = CeedFree(&activein); CeedChk(ierr); 690 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 691 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 692 693 return 0; 694 } 695 696 int CeedOperatorCreate_Blocked(CeedOperator op) { 697 int ierr; 698 Ceed ceed; 699 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 700 CeedOperator_Blocked *impl; 701 702 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 703 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 704 705 706 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 707 CeedOperatorAssembleLinearQFunction_Blocked); 708 CeedChk(ierr); 709 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 710 CeedOperatorApply_Blocked); CeedChk(ierr); 711 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 712 CeedOperatorDestroy_Blocked); CeedChk(ierr); 713 return 0; 714 } 715