1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 21 int ierr; 22 CeedOperator_Blocked *impl; 23 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 24 25 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 26 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 27 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 28 } 29 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 30 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 31 ierr = CeedFree(&impl->edata); CeedChk(ierr); 32 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 33 34 for (CeedInt i=0; i<impl->numein; i++) { 35 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 36 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 37 } 38 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 39 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 40 41 for (CeedInt i=0; i<impl->numeout; i++) { 42 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 43 if (!impl->identityqf) { 44 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 45 } 46 } 47 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 48 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 49 50 ierr = CeedFree(&impl); CeedChk(ierr); 51 return 0; 52 } 53 54 /* 55 Setup infields or outfields 56 */ 57 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 58 CeedOperator op, bool inOrOut, 59 CeedElemRestriction *blkrestr, 60 CeedVector *fullevecs, CeedVector *evecs, 61 CeedVector *qvecs, CeedInt starte, 62 CeedInt numfields, CeedInt Q) { 63 CeedInt dim, ierr, ncomp, size, P; 64 Ceed ceed; 65 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 66 CeedBasis basis; 67 CeedElemRestriction r; 68 CeedOperatorField *opfields; 69 CeedQFunctionField *qffields; 70 if (inOrOut) { 71 ierr = CeedOperatorGetFields(op, NULL, &opfields); 72 CeedChk(ierr); 73 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 74 CeedChk(ierr); 75 } else { 76 ierr = CeedOperatorGetFields(op, &opfields, NULL); 77 CeedChk(ierr); 78 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 79 CeedChk(ierr); 80 } 81 const CeedInt blksize = 8; 82 83 // Loop over fields 84 for (CeedInt i=0; i<numfields; i++) { 85 CeedEvalMode emode; 86 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 87 88 if (emode != CEED_EVAL_WEIGHT) { 89 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 90 CeedChk(ierr); 91 CeedElemRestriction_Ref *data; 92 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 93 Ceed ceed; 94 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 95 CeedInt nelem, elemsize, nnodes; 96 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 98 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 99 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 100 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 101 blksize, nnodes, ncomp, 102 CEED_MEM_HOST, CEED_COPY_VALUES, 103 data->indices, &blkrestr[i+starte]); 104 CeedChk(ierr); 105 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 106 &fullevecs[i+starte]); 107 CeedChk(ierr); 108 } 109 110 switch(emode) { 111 case CEED_EVAL_NONE: 112 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 113 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 114 break; 115 case CEED_EVAL_INTERP: 116 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 117 ierr = CeedElemRestrictionGetElementSize(r, &P); 118 CeedChk(ierr); 119 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 120 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 121 break; 122 case CEED_EVAL_GRAD: 123 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 124 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 125 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 126 ierr = CeedElemRestrictionGetElementSize(r, &P); 127 CeedChk(ierr); 128 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 129 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 130 break; 131 case CEED_EVAL_WEIGHT: // Only on input fields 132 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 133 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 134 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 135 CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 136 137 break; 138 case CEED_EVAL_DIV: 139 break; // Not implemented 140 case CEED_EVAL_CURL: 141 break; // Not implemented 142 } 143 } 144 return 0; 145 } 146 147 /* 148 CeedOperator needs to connect all the named fields (be they active or passive) 149 to the named inputs and outputs of its CeedQFunction. 150 */ 151 static int CeedOperatorSetup_Blocked(CeedOperator op) { 152 int ierr; 153 bool setupdone; 154 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 155 if (setupdone) return 0; 156 Ceed ceed; 157 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 158 CeedOperator_Blocked *impl; 159 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 160 CeedQFunction qf; 161 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 162 CeedInt Q, numinputfields, numoutputfields; 163 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 164 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 165 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 166 CeedChk(ierr); 167 CeedOperatorField *opinputfields, *opoutputfields; 168 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 169 CeedChk(ierr); 170 CeedQFunctionField *qfinputfields, *qfoutputfields; 171 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 172 CeedChk(ierr); 173 174 // Allocate 175 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 176 CeedChk(ierr); 177 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 178 CeedChk(ierr); 179 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 180 CeedChk(ierr); 181 182 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 183 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 184 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 185 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 186 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 187 188 impl->numein = numinputfields; impl->numeout = numoutputfields; 189 190 // Set up infield and outfield pointer arrays 191 // Infields 192 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 193 impl->evecs, impl->evecsin, 194 impl->qvecsin, 0, 195 numinputfields, Q); 196 CeedChk(ierr); 197 // Outfields 198 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 199 impl->evecs, impl->evecsout, 200 impl->qvecsout, numinputfields, 201 numoutputfields, Q); 202 CeedChk(ierr); 203 204 // Identity QFunctions 205 if (impl->identityqf) { 206 CeedEvalMode inmode, outmode; 207 CeedQFunctionField *infields, *outfields; 208 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 209 210 for (CeedInt i=0; i<numinputfields; i++) { 211 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 212 CeedChk(ierr); 213 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 214 CeedChk(ierr); 215 216 if (inmode == CEED_EVAL_NONE && outmode == CEED_EVAL_NONE) 217 // LCOV_EXCL_START 218 return CeedError(ceed, 1, "CEED_EVAL_NONE for a matching input and " 219 "output does not make sense with identity QFunction"); 220 // LCOV_EXCL_STOP 221 222 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 223 impl->qvecsout[i] = impl->qvecsin[i]; 224 } 225 } 226 227 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 228 229 return 0; 230 } 231 232 // Setup Input fields 233 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 234 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 235 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 236 CeedRequest *request) { 237 CeedInt ierr; 238 CeedEvalMode emode; 239 CeedVector vec; 240 CeedTransposeMode lmode; 241 uint64_t state; 242 243 for (CeedInt i=0; i<numinputfields; i++) { 244 // Get input vector 245 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 246 if (vec == CEED_VECTOR_ACTIVE) { 247 if (skipactive) 248 continue; 249 else 250 vec = invec; 251 } 252 253 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 254 CeedChk(ierr); 255 if (emode == CEED_EVAL_WEIGHT) { // Skip 256 } else { 257 // Restrict 258 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 259 if (state != impl->inputstate[i] || vec == invec) { 260 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 261 CeedChk(ierr); 262 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 263 lmode, vec, impl->evecs[i], 264 request); CeedChk(ierr); CeedChk(ierr); 265 impl->inputstate[i] = state; 266 } 267 // Get evec 268 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 269 (const CeedScalar **) &impl->edata[i]); 270 CeedChk(ierr); 271 } 272 } 273 return 0; 274 } 275 276 // Input basis action 277 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 278 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 279 CeedInt numinputfields, CeedInt blksize, bool skipactive, 280 CeedOperator_Blocked *impl) { 281 CeedInt ierr; 282 CeedInt dim, elemsize, size; 283 CeedElemRestriction Erestrict; 284 CeedEvalMode emode; 285 CeedBasis basis; 286 287 for (CeedInt i=0; i<numinputfields; i++) { 288 // Skip active input 289 if (skipactive) { 290 CeedVector vec; 291 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 292 if (vec == CEED_VECTOR_ACTIVE) 293 continue; 294 } 295 296 // Get elemsize, emode, size 297 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 298 CeedChk(ierr); 299 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 300 CeedChk(ierr); 301 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 302 CeedChk(ierr); 303 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 304 // Basis action 305 switch(emode) { 306 case CEED_EVAL_NONE: 307 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 308 CEED_USE_POINTER, 309 &impl->edata[i][e*Q*size]); CeedChk(ierr); 310 break; 311 case CEED_EVAL_INTERP: 312 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 313 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 314 CEED_USE_POINTER, 315 &impl->edata[i][e*elemsize*size]); 316 CeedChk(ierr); 317 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 318 CEED_EVAL_INTERP, impl->evecsin[i], 319 impl->qvecsin[i]); CeedChk(ierr); 320 break; 321 case CEED_EVAL_GRAD: 322 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 323 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 324 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 325 CEED_USE_POINTER, 326 &impl->edata[i][e*elemsize*size/dim]); 327 CeedChk(ierr); 328 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 329 CEED_EVAL_GRAD, impl->evecsin[i], 330 impl->qvecsin[i]); CeedChk(ierr); 331 break; 332 case CEED_EVAL_WEIGHT: 333 break; // No action 334 case CEED_EVAL_DIV: 335 case CEED_EVAL_CURL: { 336 // LCOV_EXCL_START 337 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 338 CeedChk(ierr); 339 Ceed ceed; 340 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 341 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 342 // LCOV_EXCL_STOP 343 break; // Not implemented 344 } 345 } 346 } 347 return 0; 348 } 349 350 // Output basis action 351 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 352 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 353 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 354 CeedOperator op, CeedOperator_Blocked *impl) { 355 CeedInt ierr; 356 CeedInt dim, elemsize, size; 357 CeedElemRestriction Erestrict; 358 CeedEvalMode emode; 359 CeedBasis basis; 360 361 for (CeedInt i=0; i<numoutputfields; i++) { 362 // Get elemsize, emode, size 363 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 364 CeedChk(ierr); 365 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 366 CeedChk(ierr); 367 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 368 CeedChk(ierr); 369 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 370 // Basis action 371 switch(emode) { 372 case CEED_EVAL_NONE: 373 break; // No action 374 case CEED_EVAL_INTERP: 375 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 376 CeedChk(ierr); 377 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 378 CEED_USE_POINTER, 379 &impl->edata[i + numinputfields][e*elemsize*size]); 380 CeedChk(ierr); 381 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 382 CEED_EVAL_INTERP, impl->qvecsout[i], 383 impl->evecsout[i]); CeedChk(ierr); 384 break; 385 case CEED_EVAL_GRAD: 386 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 387 CeedChk(ierr); 388 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 389 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 390 CEED_USE_POINTER, 391 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 392 CeedChk(ierr); 393 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 394 CEED_EVAL_GRAD, impl->qvecsout[i], 395 impl->evecsout[i]); CeedChk(ierr); 396 break; 397 case CEED_EVAL_WEIGHT: { 398 // LCOV_EXCL_START 399 Ceed ceed; 400 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 401 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 402 "evaluation mode"); 403 // LCOV_EXCL_STOP 404 break; // Should not occur 405 } 406 case CEED_EVAL_DIV: 407 case CEED_EVAL_CURL: { 408 // LCOV_EXCL_START 409 Ceed ceed; 410 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 411 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 412 // LCOV_EXCL_STOP 413 break; // Not implemented 414 } 415 } 416 } 417 return 0; 418 } 419 420 // Restore Inputs 421 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 422 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 423 bool skipactive, CeedOperator_Blocked *impl) { 424 CeedInt ierr; 425 CeedEvalMode emode; 426 427 for (CeedInt i=0; i<numinputfields; i++) { 428 // Skip active inputs 429 if (skipactive) { 430 CeedVector vec; 431 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 432 if (vec == CEED_VECTOR_ACTIVE) 433 continue; 434 } 435 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 436 CeedChk(ierr); 437 if (emode == CEED_EVAL_WEIGHT) { // Skip 438 } else { 439 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 440 (const CeedScalar **) &impl->edata[i]); 441 CeedChk(ierr); 442 } 443 } 444 return 0; 445 } 446 447 // Apply Ceed Operator 448 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 449 CeedVector outvec, 450 CeedRequest *request) { 451 int ierr; 452 CeedOperator_Blocked *impl; 453 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 454 const CeedInt blksize = 8; 455 CeedInt Q, numinputfields, numoutputfields, numelements, size; 456 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 457 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 458 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 459 CeedQFunction qf; 460 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 461 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 462 CeedChk(ierr); 463 CeedTransposeMode lmode; 464 CeedOperatorField *opinputfields, *opoutputfields; 465 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 466 CeedChk(ierr); 467 CeedQFunctionField *qfinputfields, *qfoutputfields; 468 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 469 CeedChk(ierr); 470 CeedEvalMode emode; 471 CeedVector vec; 472 473 // Setup 474 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 475 476 // Input Evecs and Restriction 477 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 478 opinputfields, invec, false, impl, 479 request); CeedChk(ierr); 480 481 // Output Evecs 482 for (CeedInt i=0; i<numoutputfields; i++) { 483 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 484 &impl->edata[i + numinputfields]); CeedChk(ierr); 485 } 486 487 // Loop through elements 488 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 489 // Output pointers 490 for (CeedInt i=0; i<numoutputfields; i++) { 491 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 492 CeedChk(ierr); 493 if (emode == CEED_EVAL_NONE) { 494 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 495 CeedChk(ierr); 496 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 497 CEED_USE_POINTER, 498 &impl->edata[i + numinputfields][e*Q*size]); 499 CeedChk(ierr); 500 } 501 } 502 503 // Input basis apply 504 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 505 numinputfields, blksize, false, impl); 506 CeedChk(ierr); 507 508 // Q function 509 if (!impl->identityqf) { 510 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 511 CeedChk(ierr); 512 } 513 514 // Output basis apply 515 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 516 blksize, numinputfields, 517 numoutputfields, op, impl); 518 CeedChk(ierr); 519 } 520 521 // Zero lvecs 522 for (CeedInt i=0; i<numoutputfields; i++) { 523 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 524 if (vec == CEED_VECTOR_ACTIVE) { 525 if (!impl->add) { 526 vec = outvec; 527 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 528 } 529 } else { 530 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 531 } 532 } 533 impl->add = false; 534 535 // Output restriction 536 for (CeedInt i=0; i<numoutputfields; i++) { 537 // Restore evec 538 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 539 &impl->edata[i + numinputfields]); CeedChk(ierr); 540 // Get output vector 541 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 542 // Active 543 if (vec == CEED_VECTOR_ACTIVE) 544 vec = outvec; 545 // Restrict 546 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 547 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 548 lmode, impl->evecs[i+impl->numein], vec, 549 request); CeedChk(ierr); 550 551 } 552 553 // Restore input arrays 554 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 555 opinputfields, false, impl); CeedChk(ierr); 556 557 return 0; 558 } 559 560 // Assemble Linear QFunction 561 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 562 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 563 int ierr; 564 CeedOperator_Blocked *impl; 565 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 566 const CeedInt blksize = 8; 567 CeedInt Q, numinputfields, numoutputfields, numelements, size; 568 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 569 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 570 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 571 CeedQFunction qf; 572 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 573 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 574 CeedChk(ierr); 575 CeedOperatorField *opinputfields, *opoutputfields; 576 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 577 CeedChk(ierr); 578 CeedQFunctionField *qfinputfields, *qfoutputfields; 579 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 580 CeedChk(ierr); 581 CeedVector vec, lvec; 582 CeedInt numactivein = 0, numactiveout = 0; 583 CeedVector *activein = NULL; 584 CeedScalar *a, *tmp; 585 Ceed ceed; 586 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 587 588 // Setup 589 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 590 591 // Check for identity 592 if (impl->identityqf) 593 // LCOV_EXCL_START 594 return CeedError(ceed, 1, "Assembling identity qfunction does not make sense"); 595 // LCOV_EXCL_STOP 596 597 // Input Evecs and Restriction 598 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 599 opinputfields, NULL, true, impl, 600 request); CeedChk(ierr); 601 602 // Count number of active input fields 603 for (CeedInt i=0; i<numinputfields; i++) { 604 // Get input vector 605 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 606 // Check if active input 607 if (vec == CEED_VECTOR_ACTIVE) { 608 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 609 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 610 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 611 CeedChk(ierr); 612 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 613 for (CeedInt field=0; field<size; field++) { 614 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 615 CeedChk(ierr); 616 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 617 CEED_USE_POINTER, &tmp[field*Q*blksize]); 618 CeedChk(ierr); 619 } 620 numactivein += size; 621 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 622 } 623 } 624 625 // Count number of active output fields 626 for (CeedInt i=0; i<numoutputfields; i++) { 627 // Get output vector 628 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 629 // Check if active output 630 if (vec == CEED_VECTOR_ACTIVE) { 631 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 632 numactiveout += size; 633 } 634 } 635 636 // Check sizes 637 if (!numactivein || !numactiveout) 638 // LCOV_EXCL_START 639 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 640 "and outputs"); 641 // LCOV_EXCL_STOP 642 643 // Setup lvec 644 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 645 &lvec); CeedChk(ierr); 646 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 647 648 // Create output restriction 649 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 650 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 651 // Create assembled vector 652 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 653 assembled); CeedChk(ierr); 654 655 // Loop through elements 656 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 657 // Input basis apply 658 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 659 numinputfields, blksize, true, impl); 660 CeedChk(ierr); 661 662 // Assemble QFunction 663 for (CeedInt in=0; in<numactivein; in++) { 664 // Set Inputs 665 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 666 if (numactivein > 1) { 667 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 668 0.0); CeedChk(ierr); 669 } 670 // Set Outputs 671 for (CeedInt out=0; out<numoutputfields; out++) { 672 // Get output vector 673 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 674 CeedChk(ierr); 675 // Check if active output 676 if (vec == CEED_VECTOR_ACTIVE) { 677 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 678 CEED_USE_POINTER, a); CeedChk(ierr); 679 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 680 CeedChk(ierr); 681 a += size*Q*blksize; // Advance the pointer by the size of the output 682 } 683 } 684 // Apply QFunction 685 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 686 CeedChk(ierr); 687 } 688 } 689 690 // Un-set output Qvecs to prevent accidental overwrite of Assembled 691 for (CeedInt out=0; out<numoutputfields; out++) { 692 // Get output vector 693 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 694 CeedChk(ierr); 695 // Check if active output 696 if (vec == CEED_VECTOR_ACTIVE) { 697 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 698 NULL); CeedChk(ierr); 699 } 700 } 701 702 // Restore input arrays 703 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 704 opinputfields, true, impl); CeedChk(ierr); 705 706 // Output blocked restriction 707 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 708 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 709 CeedElemRestriction blkrstr; 710 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 711 numelements*Q, 712 numactivein*numactiveout, 713 CEED_MEM_HOST, CEED_COPY_VALUES, 714 NULL, &blkrstr); CeedChk(ierr); 715 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 716 lvec, *assembled, request); CeedChk(ierr); 717 718 // Cleanup 719 for (CeedInt i=0; i<numactivein; i++) { 720 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 721 } 722 ierr = CeedFree(&activein); CeedChk(ierr); 723 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 724 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 725 726 return 0; 727 } 728 729 int CeedOperatorCreate_Blocked(CeedOperator op) { 730 int ierr; 731 Ceed ceed; 732 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 733 CeedOperator_Blocked *impl; 734 735 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 736 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 737 738 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 739 CeedOperatorAssembleLinearQFunction_Blocked); 740 CeedChk(ierr); 741 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 742 CeedOperatorApply_Blocked); CeedChk(ierr); 743 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 744 CeedOperatorDestroy_Blocked); CeedChk(ierr); 745 return 0; 746 } 747