1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 //------------------------------------------------------------------------------ 21 // Setup Input/Output Fields 22 //------------------------------------------------------------------------------ 23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 24 CeedOperator op, bool inOrOut, 25 CeedElemRestriction *blkrestr, 26 CeedVector *fullevecs, CeedVector *evecs, 27 CeedVector *qvecs, CeedInt starte, 28 CeedInt numfields, CeedInt Q) { 29 CeedInt dim, ierr, ncomp, size, P; 30 Ceed ceed; 31 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 32 CeedBasis basis; 33 CeedElemRestriction r; 34 CeedOperatorField *opfields; 35 CeedQFunctionField *qffields; 36 if (inOrOut) { 37 ierr = CeedOperatorGetFields(op, NULL, &opfields); 38 CeedChk(ierr); 39 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 40 CeedChk(ierr); 41 } else { 42 ierr = CeedOperatorGetFields(op, &opfields, NULL); 43 CeedChk(ierr); 44 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 45 CeedChk(ierr); 46 } 47 const CeedInt blksize = 8; 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, nnodes; 62 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 66 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 67 blksize, nnodes, ncomp, 68 CEED_MEM_HOST, CEED_COPY_VALUES, 69 data->indices, &blkrestr[i+starte]); 70 CeedChk(ierr); 71 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 72 &fullevecs[i+starte]); 73 CeedChk(ierr); 74 } 75 76 switch(emode) { 77 case CEED_EVAL_NONE: 78 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 79 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 80 break; 81 case CEED_EVAL_INTERP: 82 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 83 ierr = CeedElemRestrictionGetElementSize(r, &P); 84 CeedChk(ierr); 85 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 86 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 87 break; 88 case CEED_EVAL_GRAD: 89 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 90 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 91 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 92 ierr = CeedElemRestrictionGetElementSize(r, &P); 93 CeedChk(ierr); 94 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 95 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 96 break; 97 case CEED_EVAL_WEIGHT: // Only on input fields 98 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 99 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 100 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 101 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 102 CeedChk(ierr); 103 104 break; 105 case CEED_EVAL_DIV: 106 break; // Not implemented 107 case CEED_EVAL_CURL: 108 break; // Not implemented 109 } 110 } 111 return 0; 112 } 113 114 //------------------------------------------------------------------------------ 115 // Setup Operator 116 //------------------------------------------------------------------------------ 117 static int CeedOperatorSetup_Blocked(CeedOperator op) { 118 int ierr; 119 bool setupdone; 120 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 121 if (setupdone) return 0; 122 Ceed ceed; 123 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 124 CeedOperator_Blocked *impl; 125 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 126 CeedQFunction qf; 127 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 128 CeedInt Q, numinputfields, numoutputfields; 129 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 130 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 131 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 132 CeedChk(ierr); 133 CeedOperatorField *opinputfields, *opoutputfields; 134 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 135 CeedChk(ierr); 136 CeedQFunctionField *qfinputfields, *qfoutputfields; 137 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 138 CeedChk(ierr); 139 140 // Allocate 141 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 142 CeedChk(ierr); 143 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 144 CeedChk(ierr); 145 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 146 CeedChk(ierr); 147 148 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 149 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 150 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 151 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 152 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 153 154 impl->numein = numinputfields; impl->numeout = numoutputfields; 155 156 // Set up infield and outfield pointer arrays 157 // Infields 158 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 159 impl->evecs, impl->evecsin, 160 impl->qvecsin, 0, 161 numinputfields, Q); 162 CeedChk(ierr); 163 // Outfields 164 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 165 impl->evecs, impl->evecsout, 166 impl->qvecsout, numinputfields, 167 numoutputfields, Q); 168 CeedChk(ierr); 169 170 // Identity QFunctions 171 if (impl->identityqf) { 172 CeedEvalMode inmode, outmode; 173 CeedQFunctionField *infields, *outfields; 174 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 175 176 for (CeedInt i=0; i<numinputfields; i++) { 177 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 178 CeedChk(ierr); 179 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 180 CeedChk(ierr); 181 182 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 183 impl->qvecsout[i] = impl->qvecsin[i]; 184 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 185 } 186 } 187 188 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 189 190 return 0; 191 } 192 193 //------------------------------------------------------------------------------ 194 // Setup Operator Inputs 195 //------------------------------------------------------------------------------ 196 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 197 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 198 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 199 CeedRequest *request) { 200 CeedInt ierr; 201 CeedEvalMode emode; 202 CeedVector vec; 203 CeedTransposeMode lmode; 204 uint64_t state; 205 206 for (CeedInt i=0; i<numinputfields; i++) { 207 // Get input vector 208 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 209 if (vec == CEED_VECTOR_ACTIVE) { 210 if (skipactive) 211 continue; 212 else 213 vec = invec; 214 } 215 216 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 217 CeedChk(ierr); 218 if (emode == CEED_EVAL_WEIGHT) { // Skip 219 } else { 220 // Restrict 221 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 222 if (state != impl->inputstate[i] || vec == invec) { 223 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 224 CeedChk(ierr); 225 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 226 lmode, vec, impl->evecs[i], 227 request); CeedChk(ierr); CeedChk(ierr); 228 impl->inputstate[i] = state; 229 } 230 // Get evec 231 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 232 (const CeedScalar **) &impl->edata[i]); 233 CeedChk(ierr); 234 } 235 } 236 return 0; 237 } 238 239 //------------------------------------------------------------------------------ 240 // Input Basis Action 241 //------------------------------------------------------------------------------ 242 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 243 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 244 CeedInt numinputfields, CeedInt blksize, bool skipactive, 245 CeedOperator_Blocked *impl) { 246 CeedInt ierr; 247 CeedInt dim, elemsize, size; 248 CeedElemRestriction Erestrict; 249 CeedEvalMode emode; 250 CeedBasis basis; 251 252 for (CeedInt i=0; i<numinputfields; i++) { 253 // Skip active input 254 if (skipactive) { 255 CeedVector vec; 256 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 257 if (vec == CEED_VECTOR_ACTIVE) 258 continue; 259 } 260 261 // Get elemsize, emode, size 262 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 263 CeedChk(ierr); 264 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 265 CeedChk(ierr); 266 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 267 CeedChk(ierr); 268 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 269 // Basis action 270 switch(emode) { 271 case CEED_EVAL_NONE: 272 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 273 CEED_USE_POINTER, 274 &impl->edata[i][e*Q*size]); CeedChk(ierr); 275 break; 276 case CEED_EVAL_INTERP: 277 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 278 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 279 CEED_USE_POINTER, 280 &impl->edata[i][e*elemsize*size]); 281 CeedChk(ierr); 282 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 283 CEED_EVAL_INTERP, impl->evecsin[i], 284 impl->qvecsin[i]); CeedChk(ierr); 285 break; 286 case CEED_EVAL_GRAD: 287 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 288 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 289 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 290 CEED_USE_POINTER, 291 &impl->edata[i][e*elemsize*size/dim]); 292 CeedChk(ierr); 293 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 294 CEED_EVAL_GRAD, impl->evecsin[i], 295 impl->qvecsin[i]); CeedChk(ierr); 296 break; 297 case CEED_EVAL_WEIGHT: 298 break; // No action 299 case CEED_EVAL_DIV: 300 case CEED_EVAL_CURL: { 301 // LCOV_EXCL_START 302 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 303 CeedChk(ierr); 304 Ceed ceed; 305 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 306 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 307 // LCOV_EXCL_STOP 308 break; // Not implemented 309 } 310 } 311 } 312 return 0; 313 } 314 315 //------------------------------------------------------------------------------ 316 // Output Basis Action 317 //------------------------------------------------------------------------------ 318 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 319 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 320 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 321 CeedOperator op, CeedOperator_Blocked *impl) { 322 CeedInt ierr; 323 CeedInt dim, elemsize, size; 324 CeedElemRestriction Erestrict; 325 CeedEvalMode emode; 326 CeedBasis basis; 327 328 for (CeedInt i=0; i<numoutputfields; i++) { 329 // Get elemsize, emode, size 330 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 331 CeedChk(ierr); 332 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 333 CeedChk(ierr); 334 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 335 CeedChk(ierr); 336 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 337 // Basis action 338 switch(emode) { 339 case CEED_EVAL_NONE: 340 break; // No action 341 case CEED_EVAL_INTERP: 342 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 343 CeedChk(ierr); 344 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 345 CEED_USE_POINTER, 346 &impl->edata[i + numinputfields][e*elemsize*size]); 347 CeedChk(ierr); 348 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 349 CEED_EVAL_INTERP, impl->qvecsout[i], 350 impl->evecsout[i]); CeedChk(ierr); 351 break; 352 case CEED_EVAL_GRAD: 353 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 354 CeedChk(ierr); 355 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 356 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 357 CEED_USE_POINTER, 358 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 359 CeedChk(ierr); 360 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 361 CEED_EVAL_GRAD, impl->qvecsout[i], 362 impl->evecsout[i]); CeedChk(ierr); 363 break; 364 case CEED_EVAL_WEIGHT: { 365 // LCOV_EXCL_START 366 Ceed ceed; 367 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 368 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 369 "evaluation mode"); 370 // LCOV_EXCL_STOP 371 break; // Should not occur 372 } 373 case CEED_EVAL_DIV: 374 case CEED_EVAL_CURL: { 375 // LCOV_EXCL_START 376 Ceed ceed; 377 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 378 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 379 // LCOV_EXCL_STOP 380 break; // Not implemented 381 } 382 } 383 } 384 return 0; 385 } 386 387 //------------------------------------------------------------------------------ 388 // Restore Input Vectors 389 //------------------------------------------------------------------------------ 390 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 391 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 392 bool skipactive, CeedOperator_Blocked *impl) { 393 CeedInt ierr; 394 CeedEvalMode emode; 395 396 for (CeedInt i=0; i<numinputfields; i++) { 397 // Skip active inputs 398 if (skipactive) { 399 CeedVector vec; 400 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 401 if (vec == CEED_VECTOR_ACTIVE) 402 continue; 403 } 404 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 405 CeedChk(ierr); 406 if (emode == CEED_EVAL_WEIGHT) { // Skip 407 } else { 408 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 409 (const CeedScalar **) &impl->edata[i]); 410 CeedChk(ierr); 411 } 412 } 413 return 0; 414 } 415 416 //------------------------------------------------------------------------------ 417 // Operator Apply 418 //------------------------------------------------------------------------------ 419 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 420 CeedVector outvec, 421 CeedRequest *request) { 422 int ierr; 423 CeedOperator_Blocked *impl; 424 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 425 const CeedInt blksize = 8; 426 CeedInt Q, numinputfields, numoutputfields, numelements, size; 427 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 428 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 429 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 430 CeedQFunction qf; 431 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 432 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 433 CeedChk(ierr); 434 CeedTransposeMode lmode; 435 CeedOperatorField *opinputfields, *opoutputfields; 436 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 437 CeedChk(ierr); 438 CeedQFunctionField *qfinputfields, *qfoutputfields; 439 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 440 CeedChk(ierr); 441 CeedEvalMode emode; 442 CeedVector vec; 443 444 // Setup 445 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 446 447 // Input Evecs and Restriction 448 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 449 opinputfields, invec, false, impl, 450 request); CeedChk(ierr); 451 452 // Output Evecs 453 for (CeedInt i=0; i<numoutputfields; i++) { 454 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 455 &impl->edata[i + numinputfields]); CeedChk(ierr); 456 } 457 458 // Loop through elements 459 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 460 // Output pointers 461 for (CeedInt i=0; i<numoutputfields; i++) { 462 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 463 CeedChk(ierr); 464 if (emode == CEED_EVAL_NONE) { 465 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 466 CeedChk(ierr); 467 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 468 CEED_USE_POINTER, 469 &impl->edata[i + numinputfields][e*Q*size]); 470 CeedChk(ierr); 471 } 472 } 473 474 // Input basis apply 475 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 476 numinputfields, blksize, false, impl); 477 CeedChk(ierr); 478 479 // Q function 480 if (!impl->identityqf) { 481 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 482 CeedChk(ierr); 483 } 484 485 // Output basis apply 486 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 487 blksize, numinputfields, 488 numoutputfields, op, impl); 489 CeedChk(ierr); 490 } 491 492 // Output restriction 493 for (CeedInt i=0; i<numoutputfields; i++) { 494 // Restore evec 495 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 496 &impl->edata[i + numinputfields]); CeedChk(ierr); 497 // Get output vector 498 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 499 // Active 500 if (vec == CEED_VECTOR_ACTIVE) 501 vec = outvec; 502 // Restrict 503 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 504 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, 505 lmode, impl->evecs[i+impl->numein], vec, 506 request); CeedChk(ierr); 507 508 } 509 510 // Restore input arrays 511 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 512 opinputfields, false, impl); CeedChk(ierr); 513 514 return 0; 515 } 516 517 //------------------------------------------------------------------------------ 518 // Assemble Linear QFunction 519 //------------------------------------------------------------------------------ 520 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 521 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 522 int ierr; 523 CeedOperator_Blocked *impl; 524 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 525 const CeedInt blksize = 8; 526 CeedInt Q, numinputfields, numoutputfields, numelements, size; 527 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 528 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 529 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 530 CeedQFunction qf; 531 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 532 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 533 CeedChk(ierr); 534 CeedOperatorField *opinputfields, *opoutputfields; 535 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 536 CeedChk(ierr); 537 CeedQFunctionField *qfinputfields, *qfoutputfields; 538 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 539 CeedChk(ierr); 540 CeedVector vec, lvec; 541 CeedInt numactivein = 0, numactiveout = 0; 542 CeedVector *activein = NULL; 543 CeedScalar *a, *tmp; 544 Ceed ceed; 545 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 546 547 // Setup 548 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 549 550 // Check for identity 551 if (impl->identityqf) 552 // LCOV_EXCL_START 553 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 554 // LCOV_EXCL_STOP 555 556 // Input Evecs and Restriction 557 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 558 opinputfields, NULL, true, impl, 559 request); CeedChk(ierr); 560 561 // Count number of active input fields 562 for (CeedInt i=0; i<numinputfields; i++) { 563 // Get input vector 564 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 565 // Check if active input 566 if (vec == CEED_VECTOR_ACTIVE) { 567 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 568 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 569 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 570 CeedChk(ierr); 571 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 572 for (CeedInt field=0; field<size; field++) { 573 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 574 CeedChk(ierr); 575 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 576 CEED_USE_POINTER, &tmp[field*Q*blksize]); 577 CeedChk(ierr); 578 } 579 numactivein += size; 580 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 581 } 582 } 583 584 // Count number of active output fields 585 for (CeedInt i=0; i<numoutputfields; i++) { 586 // Get output vector 587 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 588 // Check if active output 589 if (vec == CEED_VECTOR_ACTIVE) { 590 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 591 numactiveout += size; 592 } 593 } 594 595 // Check sizes 596 if (!numactivein || !numactiveout) 597 // LCOV_EXCL_START 598 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 599 "and outputs"); 600 // LCOV_EXCL_STOP 601 602 // Setup lvec 603 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 604 &lvec); CeedChk(ierr); 605 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 606 607 // Create output restriction 608 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 609 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 610 // Create assembled vector 611 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 612 assembled); CeedChk(ierr); 613 614 // Loop through elements 615 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 616 // Input basis apply 617 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 618 numinputfields, blksize, true, impl); 619 CeedChk(ierr); 620 621 // Assemble QFunction 622 for (CeedInt in=0; in<numactivein; in++) { 623 // Set Inputs 624 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 625 if (numactivein > 1) { 626 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 627 0.0); CeedChk(ierr); 628 } 629 // Set Outputs 630 for (CeedInt out=0; out<numoutputfields; out++) { 631 // Get output vector 632 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 633 CeedChk(ierr); 634 // Check if active output 635 if (vec == CEED_VECTOR_ACTIVE) { 636 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 637 CEED_USE_POINTER, a); CeedChk(ierr); 638 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 639 CeedChk(ierr); 640 a += size*Q*blksize; // Advance the pointer by the size of the output 641 } 642 } 643 // Apply QFunction 644 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 645 CeedChk(ierr); 646 } 647 } 648 649 // Un-set output Qvecs to prevent accidental overwrite of Assembled 650 for (CeedInt out=0; out<numoutputfields; out++) { 651 // Get output vector 652 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 653 CeedChk(ierr); 654 // Check if active output 655 if (vec == CEED_VECTOR_ACTIVE) { 656 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 657 NULL); CeedChk(ierr); 658 } 659 } 660 661 // Restore input arrays 662 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 663 opinputfields, true, impl); CeedChk(ierr); 664 665 // Output blocked restriction 666 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 667 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 668 CeedElemRestriction blkrstr; 669 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 670 numelements*Q, 671 numactivein*numactiveout, 672 CEED_MEM_HOST, CEED_COPY_VALUES, 673 NULL, &blkrstr); CeedChk(ierr); 674 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 675 lvec, *assembled, request); CeedChk(ierr); 676 677 // Cleanup 678 for (CeedInt i=0; i<numactivein; i++) { 679 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 680 } 681 ierr = CeedFree(&activein); CeedChk(ierr); 682 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 683 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 684 685 return 0; 686 } 687 688 //------------------------------------------------------------------------------ 689 // Operator Destroy 690 //------------------------------------------------------------------------------ 691 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 692 int ierr; 693 CeedOperator_Blocked *impl; 694 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 695 696 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 697 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 698 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 699 } 700 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 701 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 702 ierr = CeedFree(&impl->edata); CeedChk(ierr); 703 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 704 705 for (CeedInt i=0; i<impl->numein; i++) { 706 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 707 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 708 } 709 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 710 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 711 712 for (CeedInt i=0; i<impl->numeout; i++) { 713 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 714 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 715 } 716 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 717 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 718 719 ierr = CeedFree(&impl); CeedChk(ierr); 720 return 0; 721 } 722 723 //------------------------------------------------------------------------------ 724 // Operator Create 725 //------------------------------------------------------------------------------ 726 int CeedOperatorCreate_Blocked(CeedOperator op) { 727 int ierr; 728 Ceed ceed; 729 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 730 CeedOperator_Blocked *impl; 731 732 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 733 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 734 735 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 736 CeedOperatorAssembleLinearQFunction_Blocked); 737 CeedChk(ierr); 738 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 739 CeedOperatorApply_Blocked); CeedChk(ierr); 740 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 741 CeedOperatorDestroy_Blocked); CeedChk(ierr); 742 return 0; 743 } 744 //------------------------------------------------------------------------------ 745