1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 //------------------------------------------------------------------------------ 21 // Setup Input/Output Fields 22 //------------------------------------------------------------------------------ 23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 24 CeedOperator op, bool inOrOut, 25 CeedElemRestriction *blkrestr, 26 CeedVector *fullevecs, CeedVector *evecs, 27 CeedVector *qvecs, CeedInt starte, 28 CeedInt numfields, CeedInt Q) { 29 CeedInt dim, ierr, ncomp, size, P; 30 Ceed ceed; 31 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 32 CeedBasis basis; 33 CeedElemRestriction r; 34 CeedOperatorField *opfields; 35 CeedQFunctionField *qffields; 36 if (inOrOut) { 37 ierr = CeedOperatorGetFields(op, NULL, &opfields); 38 CeedChk(ierr); 39 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 40 CeedChk(ierr); 41 } else { 42 ierr = CeedOperatorGetFields(op, &opfields, NULL); 43 CeedChk(ierr); 44 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 45 CeedChk(ierr); 46 } 47 const CeedInt blksize = 8; 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, nnodes; 62 CeedTransposeMode lmode; 63 ierr = CeedElemRestrictionGetLMode(r, &lmode); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 66 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 68 ierr = CeedElemRestrictionCreateBlocked(ceed, lmode, nelem, elemsize, 69 blksize, nnodes, ncomp, 70 CEED_MEM_HOST, CEED_COPY_VALUES, 71 data->indices, &blkrestr[i+starte]); 72 CeedChk(ierr); 73 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 74 &fullevecs[i+starte]); 75 CeedChk(ierr); 76 } 77 78 switch(emode) { 79 case CEED_EVAL_NONE: 80 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 81 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 82 break; 83 case CEED_EVAL_INTERP: 84 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 85 ierr = CeedElemRestrictionGetElementSize(r, &P); 86 CeedChk(ierr); 87 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 88 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 89 break; 90 case CEED_EVAL_GRAD: 91 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 92 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 93 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 94 ierr = CeedElemRestrictionGetElementSize(r, &P); 95 CeedChk(ierr); 96 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 97 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 98 break; 99 case CEED_EVAL_WEIGHT: // Only on input fields 100 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 101 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 102 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 103 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 104 CeedChk(ierr); 105 106 break; 107 case CEED_EVAL_DIV: 108 break; // Not implemented 109 case CEED_EVAL_CURL: 110 break; // Not implemented 111 } 112 } 113 return 0; 114 } 115 116 //------------------------------------------------------------------------------ 117 // Setup Operator 118 //------------------------------------------------------------------------------ 119 static int CeedOperatorSetup_Blocked(CeedOperator op) { 120 int ierr; 121 bool setupdone; 122 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 123 if (setupdone) return 0; 124 Ceed ceed; 125 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 126 CeedOperator_Blocked *impl; 127 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 128 CeedQFunction qf; 129 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 130 CeedInt Q, numinputfields, numoutputfields; 131 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 132 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 133 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 134 CeedChk(ierr); 135 CeedOperatorField *opinputfields, *opoutputfields; 136 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 137 CeedChk(ierr); 138 CeedQFunctionField *qfinputfields, *qfoutputfields; 139 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 140 CeedChk(ierr); 141 142 // Allocate 143 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 144 CeedChk(ierr); 145 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 146 CeedChk(ierr); 147 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 148 CeedChk(ierr); 149 150 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 151 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 152 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 153 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 154 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 155 156 impl->numein = numinputfields; impl->numeout = numoutputfields; 157 158 // Set up infield and outfield pointer arrays 159 // Infields 160 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 161 impl->evecs, impl->evecsin, 162 impl->qvecsin, 0, 163 numinputfields, Q); 164 CeedChk(ierr); 165 // Outfields 166 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 167 impl->evecs, impl->evecsout, 168 impl->qvecsout, numinputfields, 169 numoutputfields, Q); 170 CeedChk(ierr); 171 172 // Identity QFunctions 173 if (impl->identityqf) { 174 CeedEvalMode inmode, outmode; 175 CeedQFunctionField *infields, *outfields; 176 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 177 178 for (CeedInt i=0; i<numinputfields; i++) { 179 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 180 CeedChk(ierr); 181 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 182 CeedChk(ierr); 183 184 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 185 impl->qvecsout[i] = impl->qvecsin[i]; 186 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 187 } 188 } 189 190 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 191 192 return 0; 193 } 194 195 //------------------------------------------------------------------------------ 196 // Setup Operator Inputs 197 //------------------------------------------------------------------------------ 198 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 199 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 200 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 201 CeedRequest *request) { 202 CeedInt ierr; 203 CeedEvalMode emode; 204 CeedVector vec; 205 uint64_t state; 206 207 for (CeedInt i=0; i<numinputfields; i++) { 208 // Get input vector 209 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 210 if (vec == CEED_VECTOR_ACTIVE) { 211 if (skipactive) 212 continue; 213 else 214 vec = invec; 215 } 216 217 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 218 CeedChk(ierr); 219 if (emode == CEED_EVAL_WEIGHT) { // Skip 220 } else { 221 // Restrict 222 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 223 if (state != impl->inputstate[i] || vec == invec) { 224 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 225 vec, impl->evecs[i], request); 226 CeedChk(ierr); 227 impl->inputstate[i] = state; 228 } 229 // Get evec 230 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 231 (const CeedScalar **) &impl->edata[i]); 232 CeedChk(ierr); 233 } 234 } 235 return 0; 236 } 237 238 //------------------------------------------------------------------------------ 239 // Input Basis Action 240 //------------------------------------------------------------------------------ 241 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 242 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 243 CeedInt numinputfields, CeedInt blksize, bool skipactive, 244 CeedOperator_Blocked *impl) { 245 CeedInt ierr; 246 CeedInt dim, elemsize, size; 247 CeedElemRestriction Erestrict; 248 CeedEvalMode emode; 249 CeedBasis basis; 250 251 for (CeedInt i=0; i<numinputfields; i++) { 252 // Skip active input 253 if (skipactive) { 254 CeedVector vec; 255 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 256 if (vec == CEED_VECTOR_ACTIVE) 257 continue; 258 } 259 260 // Get elemsize, emode, size 261 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 262 CeedChk(ierr); 263 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 264 CeedChk(ierr); 265 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 266 CeedChk(ierr); 267 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 268 // Basis action 269 switch(emode) { 270 case CEED_EVAL_NONE: 271 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 272 CEED_USE_POINTER, 273 &impl->edata[i][e*Q*size]); CeedChk(ierr); 274 break; 275 case CEED_EVAL_INTERP: 276 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 277 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 278 CEED_USE_POINTER, 279 &impl->edata[i][e*elemsize*size]); 280 CeedChk(ierr); 281 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 282 CEED_EVAL_INTERP, impl->evecsin[i], 283 impl->qvecsin[i]); CeedChk(ierr); 284 break; 285 case CEED_EVAL_GRAD: 286 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 287 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 288 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 289 CEED_USE_POINTER, 290 &impl->edata[i][e*elemsize*size/dim]); 291 CeedChk(ierr); 292 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 293 CEED_EVAL_GRAD, impl->evecsin[i], 294 impl->qvecsin[i]); CeedChk(ierr); 295 break; 296 case CEED_EVAL_WEIGHT: 297 break; // No action 298 case CEED_EVAL_DIV: 299 case CEED_EVAL_CURL: { 300 // LCOV_EXCL_START 301 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 302 CeedChk(ierr); 303 Ceed ceed; 304 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 305 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 306 // LCOV_EXCL_STOP 307 break; // Not implemented 308 } 309 } 310 } 311 return 0; 312 } 313 314 //------------------------------------------------------------------------------ 315 // Output Basis Action 316 //------------------------------------------------------------------------------ 317 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 318 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 319 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 320 CeedOperator op, CeedOperator_Blocked *impl) { 321 CeedInt ierr; 322 CeedInt dim, elemsize, size; 323 CeedElemRestriction Erestrict; 324 CeedEvalMode emode; 325 CeedBasis basis; 326 327 for (CeedInt i=0; i<numoutputfields; i++) { 328 // Get elemsize, emode, size 329 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 330 CeedChk(ierr); 331 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 332 CeedChk(ierr); 333 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 334 CeedChk(ierr); 335 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 336 // Basis action 337 switch(emode) { 338 case CEED_EVAL_NONE: 339 break; // No action 340 case CEED_EVAL_INTERP: 341 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 342 CeedChk(ierr); 343 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 344 CEED_USE_POINTER, 345 &impl->edata[i + numinputfields][e*elemsize*size]); 346 CeedChk(ierr); 347 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 348 CEED_EVAL_INTERP, impl->qvecsout[i], 349 impl->evecsout[i]); CeedChk(ierr); 350 break; 351 case CEED_EVAL_GRAD: 352 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 353 CeedChk(ierr); 354 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 355 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 356 CEED_USE_POINTER, 357 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 358 CeedChk(ierr); 359 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 360 CEED_EVAL_GRAD, impl->qvecsout[i], 361 impl->evecsout[i]); CeedChk(ierr); 362 break; 363 case CEED_EVAL_WEIGHT: { 364 // LCOV_EXCL_START 365 Ceed ceed; 366 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 367 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 368 "evaluation mode"); 369 // LCOV_EXCL_STOP 370 break; // Should not occur 371 } 372 case CEED_EVAL_DIV: 373 case CEED_EVAL_CURL: { 374 // LCOV_EXCL_START 375 Ceed ceed; 376 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 377 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 378 // LCOV_EXCL_STOP 379 break; // Not implemented 380 } 381 } 382 } 383 return 0; 384 } 385 386 //------------------------------------------------------------------------------ 387 // Restore Input Vectors 388 //------------------------------------------------------------------------------ 389 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 390 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 391 bool skipactive, CeedOperator_Blocked *impl) { 392 CeedInt ierr; 393 CeedEvalMode emode; 394 395 for (CeedInt i=0; i<numinputfields; i++) { 396 // Skip active inputs 397 if (skipactive) { 398 CeedVector vec; 399 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 400 if (vec == CEED_VECTOR_ACTIVE) 401 continue; 402 } 403 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 404 CeedChk(ierr); 405 if (emode == CEED_EVAL_WEIGHT) { // Skip 406 } else { 407 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 408 (const CeedScalar **) &impl->edata[i]); 409 CeedChk(ierr); 410 } 411 } 412 return 0; 413 } 414 415 //------------------------------------------------------------------------------ 416 // Operator Apply 417 //------------------------------------------------------------------------------ 418 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 419 CeedVector outvec, 420 CeedRequest *request) { 421 int ierr; 422 CeedOperator_Blocked *impl; 423 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 424 const CeedInt blksize = 8; 425 CeedInt Q, numinputfields, numoutputfields, numelements, size; 426 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 427 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 428 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 429 CeedQFunction qf; 430 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 431 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 432 CeedChk(ierr); 433 CeedOperatorField *opinputfields, *opoutputfields; 434 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 435 CeedChk(ierr); 436 CeedQFunctionField *qfinputfields, *qfoutputfields; 437 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 438 CeedChk(ierr); 439 CeedEvalMode emode; 440 CeedVector vec; 441 442 // Setup 443 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 444 445 // Input Evecs and Restriction 446 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 447 opinputfields, invec, false, impl, 448 request); CeedChk(ierr); 449 450 // Output Evecs 451 for (CeedInt i=0; i<numoutputfields; i++) { 452 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 453 &impl->edata[i + numinputfields]); CeedChk(ierr); 454 } 455 456 // Loop through elements 457 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 458 // Output pointers 459 for (CeedInt i=0; i<numoutputfields; i++) { 460 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 461 CeedChk(ierr); 462 if (emode == CEED_EVAL_NONE) { 463 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 464 CeedChk(ierr); 465 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 466 CEED_USE_POINTER, 467 &impl->edata[i + numinputfields][e*Q*size]); 468 CeedChk(ierr); 469 } 470 } 471 472 // Input basis apply 473 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 474 numinputfields, blksize, false, impl); 475 CeedChk(ierr); 476 477 // Q function 478 if (!impl->identityqf) { 479 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 480 CeedChk(ierr); 481 } 482 483 // Output basis apply 484 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 485 blksize, numinputfields, 486 numoutputfields, op, impl); 487 CeedChk(ierr); 488 } 489 490 // Output restriction 491 for (CeedInt i=0; i<numoutputfields; i++) { 492 // Restore evec 493 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 494 &impl->edata[i + numinputfields]); CeedChk(ierr); 495 // Get output vector 496 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 497 // Active 498 if (vec == CEED_VECTOR_ACTIVE) 499 vec = outvec; 500 // Restrict 501 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], 502 CEED_TRANSPOSE, impl->evecs[i+impl->numein], 503 vec, request); CeedChk(ierr); 504 505 } 506 507 // Restore input arrays 508 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 509 opinputfields, false, impl); CeedChk(ierr); 510 511 return 0; 512 } 513 514 //------------------------------------------------------------------------------ 515 // Assemble Linear QFunction 516 //------------------------------------------------------------------------------ 517 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 518 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 519 int ierr; 520 CeedOperator_Blocked *impl; 521 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 522 const CeedInt blksize = 8; 523 CeedInt Q, numinputfields, numoutputfields, numelements, size; 524 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 525 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 526 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 527 CeedQFunction qf; 528 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 529 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 530 CeedChk(ierr); 531 CeedOperatorField *opinputfields, *opoutputfields; 532 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 533 CeedChk(ierr); 534 CeedQFunctionField *qfinputfields, *qfoutputfields; 535 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 536 CeedChk(ierr); 537 CeedVector vec, lvec; 538 CeedInt numactivein = 0, numactiveout = 0; 539 CeedVector *activein = NULL; 540 CeedScalar *a, *tmp; 541 Ceed ceed; 542 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 543 544 // Setup 545 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 546 547 // Check for identity 548 if (impl->identityqf) 549 // LCOV_EXCL_START 550 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 551 // LCOV_EXCL_STOP 552 553 // Input Evecs and Restriction 554 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 555 opinputfields, NULL, true, impl, 556 request); CeedChk(ierr); 557 558 // Count number of active input fields 559 for (CeedInt i=0; i<numinputfields; i++) { 560 // Get input vector 561 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 562 // Check if active input 563 if (vec == CEED_VECTOR_ACTIVE) { 564 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 565 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 566 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 567 CeedChk(ierr); 568 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 569 for (CeedInt field=0; field<size; field++) { 570 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 571 CeedChk(ierr); 572 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 573 CEED_USE_POINTER, &tmp[field*Q*blksize]); 574 CeedChk(ierr); 575 } 576 numactivein += size; 577 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 578 } 579 } 580 581 // Count number of active output fields 582 for (CeedInt i=0; i<numoutputfields; i++) { 583 // Get output vector 584 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 585 // Check if active output 586 if (vec == CEED_VECTOR_ACTIVE) { 587 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 588 numactiveout += size; 589 } 590 } 591 592 // Check sizes 593 if (!numactivein || !numactiveout) 594 // LCOV_EXCL_START 595 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 596 "and outputs"); 597 // LCOV_EXCL_STOP 598 599 // Setup lvec 600 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 601 &lvec); CeedChk(ierr); 602 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 603 604 // Create output restriction 605 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 606 ierr = CeedElemRestrictionCreateIdentity(ceed, lmode, numelements, Q, 607 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 608 // Create assembled vector 609 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 610 assembled); CeedChk(ierr); 611 612 // Loop through elements 613 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 614 // Input basis apply 615 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 616 numinputfields, blksize, true, impl); 617 CeedChk(ierr); 618 619 // Assemble QFunction 620 for (CeedInt in=0; in<numactivein; in++) { 621 // Set Inputs 622 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 623 if (numactivein > 1) { 624 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 625 0.0); CeedChk(ierr); 626 } 627 // Set Outputs 628 for (CeedInt out=0; out<numoutputfields; out++) { 629 // Get output vector 630 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 631 CeedChk(ierr); 632 // Check if active output 633 if (vec == CEED_VECTOR_ACTIVE) { 634 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 635 CEED_USE_POINTER, a); CeedChk(ierr); 636 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 637 CeedChk(ierr); 638 a += size*Q*blksize; // Advance the pointer by the size of the output 639 } 640 } 641 // Apply QFunction 642 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 643 CeedChk(ierr); 644 } 645 } 646 647 // Un-set output Qvecs to prevent accidental overwrite of Assembled 648 for (CeedInt out=0; out<numoutputfields; out++) { 649 // Get output vector 650 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 651 CeedChk(ierr); 652 // Check if active output 653 if (vec == CEED_VECTOR_ACTIVE) { 654 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 655 NULL); CeedChk(ierr); 656 } 657 } 658 659 // Restore input arrays 660 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 661 opinputfields, true, impl); CeedChk(ierr); 662 663 // Output blocked restriction 664 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 665 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 666 CeedElemRestriction blkrstr; 667 ierr = CeedElemRestrictionCreateBlocked(ceed, lmode, numelements, Q, blksize, 668 numelements*Q, 669 numactivein*numactiveout, 670 CEED_MEM_HOST, CEED_COPY_VALUES, 671 NULL, &blkrstr); CeedChk(ierr); 672 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 673 request); CeedChk(ierr); 674 675 // Cleanup 676 for (CeedInt i=0; i<numactivein; i++) { 677 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 678 } 679 ierr = CeedFree(&activein); CeedChk(ierr); 680 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 681 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 682 683 return 0; 684 } 685 686 //------------------------------------------------------------------------------ 687 // Operator Destroy 688 //------------------------------------------------------------------------------ 689 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 690 int ierr; 691 CeedOperator_Blocked *impl; 692 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 693 694 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 695 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 696 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 697 } 698 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 699 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 700 ierr = CeedFree(&impl->edata); CeedChk(ierr); 701 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 702 703 for (CeedInt i=0; i<impl->numein; i++) { 704 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 705 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 706 } 707 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 708 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 709 710 for (CeedInt i=0; i<impl->numeout; i++) { 711 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 712 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 713 } 714 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 715 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 716 717 ierr = CeedFree(&impl); CeedChk(ierr); 718 return 0; 719 } 720 721 //------------------------------------------------------------------------------ 722 // Operator Create 723 //------------------------------------------------------------------------------ 724 int CeedOperatorCreate_Blocked(CeedOperator op) { 725 int ierr; 726 Ceed ceed; 727 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 728 CeedOperator_Blocked *impl; 729 730 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 731 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 732 733 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 734 CeedOperatorAssembleLinearQFunction_Blocked); 735 CeedChk(ierr); 736 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 737 CeedOperatorApply_Blocked); CeedChk(ierr); 738 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 739 CeedOperatorDestroy_Blocked); CeedChk(ierr); 740 return 0; 741 } 742 //------------------------------------------------------------------------------ 743