1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 19 //------------------------------------------------------------------------------ 20 // Setup Input/Output Fields 21 //------------------------------------------------------------------------------ 22 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 23 CeedOperator op, bool inOrOut, 24 CeedElemRestriction *blkrestr, 25 CeedVector *fullevecs, CeedVector *evecs, 26 CeedVector *qvecs, CeedInt starte, 27 CeedInt numfields, CeedInt Q) { 28 CeedInt dim, ierr, ncomp, size, P; 29 Ceed ceed; 30 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 31 CeedBasis basis; 32 CeedElemRestriction r; 33 CeedOperatorField *opfields; 34 CeedQFunctionField *qffields; 35 if (inOrOut) { 36 ierr = CeedOperatorGetFields(op, NULL, &opfields); 37 CeedChk(ierr); 38 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 39 CeedChk(ierr); 40 } else { 41 ierr = CeedOperatorGetFields(op, &opfields, NULL); 42 CeedChk(ierr); 43 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 44 CeedChk(ierr); 45 } 46 const CeedInt blksize = 8; 47 48 // Loop over fields 49 for (CeedInt i=0; i<numfields; i++) { 50 CeedEvalMode emode; 51 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 52 53 if (emode != CEED_EVAL_WEIGHT) { 54 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 55 CeedChk(ierr); 56 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 57 CeedInt nelem, elemsize, lsize, compstride; 58 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 59 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 60 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 61 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 62 63 bool strided; 64 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChk(ierr); 65 if (strided) { 66 CeedInt strides[3]; 67 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 68 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 69 blksize, ncomp, lsize, strides, &blkrestr[i+starte]); 70 CeedChk(ierr); 71 } else { 72 const CeedInt *offsets = NULL; 73 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 74 CeedChk(ierr); 75 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 76 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 77 blksize, ncomp, compstride, 78 lsize, CEED_MEM_HOST, 79 CEED_COPY_VALUES, offsets, 80 &blkrestr[i+starte]); 81 CeedChk(ierr); 82 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChk(ierr); 83 } 84 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 85 &fullevecs[i+starte]); 86 CeedChk(ierr); 87 } 88 89 switch(emode) { 90 case CEED_EVAL_NONE: 91 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 92 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 93 break; 94 case CEED_EVAL_INTERP: 95 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetElementSize(r, &P); 97 CeedChk(ierr); 98 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 99 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 100 break; 101 case CEED_EVAL_GRAD: 102 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 103 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 104 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 105 ierr = CeedElemRestrictionGetElementSize(r, &P); 106 CeedChk(ierr); 107 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 108 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 109 break; 110 case CEED_EVAL_WEIGHT: // Only on input fields 111 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 112 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 113 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 114 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 115 CeedChk(ierr); 116 117 break; 118 case CEED_EVAL_DIV: 119 break; // Not implemented 120 case CEED_EVAL_CURL: 121 break; // Not implemented 122 } 123 } 124 return 0; 125 } 126 127 //------------------------------------------------------------------------------ 128 // Setup Operator 129 //------------------------------------------------------------------------------ 130 static int CeedOperatorSetup_Blocked(CeedOperator op) { 131 int ierr; 132 bool setupdone; 133 ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChk(ierr); 134 if (setupdone) return 0; 135 Ceed ceed; 136 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 137 CeedOperator_Blocked *impl; 138 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 139 CeedQFunction qf; 140 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 141 CeedInt Q, numinputfields, numoutputfields; 142 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 143 ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChk(ierr); 144 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 145 CeedChk(ierr); 146 CeedOperatorField *opinputfields, *opoutputfields; 147 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 148 CeedChk(ierr); 149 CeedQFunctionField *qfinputfields, *qfoutputfields; 150 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 151 CeedChk(ierr); 152 153 // Allocate 154 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 155 CeedChk(ierr); 156 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 157 CeedChk(ierr); 158 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 159 CeedChk(ierr); 160 161 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 162 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 163 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 164 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 165 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 166 167 impl->numein = numinputfields; impl->numeout = numoutputfields; 168 169 // Set up infield and outfield pointer arrays 170 // Infields 171 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 172 impl->evecs, impl->evecsin, 173 impl->qvecsin, 0, 174 numinputfields, Q); 175 CeedChk(ierr); 176 // Outfields 177 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 178 impl->evecs, impl->evecsout, 179 impl->qvecsout, numinputfields, 180 numoutputfields, Q); 181 CeedChk(ierr); 182 183 // Identity QFunctions 184 if (impl->identityqf) { 185 CeedEvalMode inmode, outmode; 186 CeedQFunctionField *infields, *outfields; 187 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 188 189 for (CeedInt i=0; i<numinputfields; i++) { 190 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 191 CeedChk(ierr); 192 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 193 CeedChk(ierr); 194 195 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 196 impl->qvecsout[i] = impl->qvecsin[i]; 197 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 198 } 199 } 200 201 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 202 203 return 0; 204 } 205 206 //------------------------------------------------------------------------------ 207 // Setup Operator Inputs 208 //------------------------------------------------------------------------------ 209 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 210 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 211 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 212 CeedRequest *request) { 213 CeedInt ierr; 214 CeedEvalMode emode; 215 CeedVector vec; 216 uint64_t state; 217 218 for (CeedInt i=0; i<numinputfields; i++) { 219 // Get input vector 220 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 221 if (vec == CEED_VECTOR_ACTIVE) { 222 if (skipactive) 223 continue; 224 else 225 vec = invec; 226 } 227 228 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 229 CeedChk(ierr); 230 if (emode == CEED_EVAL_WEIGHT) { // Skip 231 } else { 232 // Restrict 233 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 234 if (state != impl->inputstate[i] || vec == invec) { 235 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 236 vec, impl->evecs[i], request); 237 CeedChk(ierr); 238 impl->inputstate[i] = state; 239 } 240 // Get evec 241 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 242 (const CeedScalar **) &impl->edata[i]); 243 CeedChk(ierr); 244 } 245 } 246 return 0; 247 } 248 249 //------------------------------------------------------------------------------ 250 // Input Basis Action 251 //------------------------------------------------------------------------------ 252 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 253 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 254 CeedInt numinputfields, CeedInt blksize, bool skipactive, 255 CeedOperator_Blocked *impl) { 256 CeedInt ierr; 257 CeedInt dim, elemsize, size; 258 CeedElemRestriction Erestrict; 259 CeedEvalMode emode; 260 CeedBasis basis; 261 262 for (CeedInt i=0; i<numinputfields; i++) { 263 // Skip active input 264 if (skipactive) { 265 CeedVector vec; 266 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 267 if (vec == CEED_VECTOR_ACTIVE) 268 continue; 269 } 270 271 // Get elemsize, emode, size 272 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 273 CeedChk(ierr); 274 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 275 CeedChk(ierr); 276 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 277 CeedChk(ierr); 278 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 279 // Basis action 280 switch(emode) { 281 case CEED_EVAL_NONE: 282 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 283 CEED_USE_POINTER, 284 &impl->edata[i][e*Q*size]); CeedChk(ierr); 285 break; 286 case CEED_EVAL_INTERP: 287 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 288 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 289 CEED_USE_POINTER, 290 &impl->edata[i][e*elemsize*size]); 291 CeedChk(ierr); 292 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 293 CEED_EVAL_INTERP, impl->evecsin[i], 294 impl->qvecsin[i]); CeedChk(ierr); 295 break; 296 case CEED_EVAL_GRAD: 297 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 298 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 299 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 300 CEED_USE_POINTER, 301 &impl->edata[i][e*elemsize*size/dim]); 302 CeedChk(ierr); 303 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 304 CEED_EVAL_GRAD, impl->evecsin[i], 305 impl->qvecsin[i]); CeedChk(ierr); 306 break; 307 case CEED_EVAL_WEIGHT: 308 break; // No action 309 // LCOV_EXCL_START 310 case CEED_EVAL_DIV: 311 case CEED_EVAL_CURL: { 312 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 313 CeedChk(ierr); 314 Ceed ceed; 315 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 316 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 317 // LCOV_EXCL_STOP 318 } 319 } 320 } 321 return 0; 322 } 323 324 //------------------------------------------------------------------------------ 325 // Output Basis Action 326 //------------------------------------------------------------------------------ 327 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 328 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 329 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 330 CeedOperator op, CeedOperator_Blocked *impl) { 331 CeedInt ierr; 332 CeedInt dim, elemsize, size; 333 CeedElemRestriction Erestrict; 334 CeedEvalMode emode; 335 CeedBasis basis; 336 337 for (CeedInt i=0; i<numoutputfields; i++) { 338 // Get elemsize, emode, size 339 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 340 CeedChk(ierr); 341 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 342 CeedChk(ierr); 343 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 344 CeedChk(ierr); 345 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 346 // Basis action 347 switch(emode) { 348 case CEED_EVAL_NONE: 349 break; // No action 350 case CEED_EVAL_INTERP: 351 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 352 CeedChk(ierr); 353 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 354 CEED_USE_POINTER, 355 &impl->edata[i + numinputfields][e*elemsize*size]); 356 CeedChk(ierr); 357 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 358 CEED_EVAL_INTERP, impl->qvecsout[i], 359 impl->evecsout[i]); CeedChk(ierr); 360 break; 361 case CEED_EVAL_GRAD: 362 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 363 CeedChk(ierr); 364 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 365 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 366 CEED_USE_POINTER, 367 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 368 CeedChk(ierr); 369 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 370 CEED_EVAL_GRAD, impl->qvecsout[i], 371 impl->evecsout[i]); CeedChk(ierr); 372 break; 373 // LCOV_EXCL_START 374 case CEED_EVAL_WEIGHT: { 375 Ceed ceed; 376 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 377 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 378 "evaluation mode"); 379 } 380 case CEED_EVAL_DIV: 381 case CEED_EVAL_CURL: { 382 Ceed ceed; 383 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 384 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 385 // LCOV_EXCL_STOP 386 } 387 } 388 } 389 return 0; 390 } 391 392 //------------------------------------------------------------------------------ 393 // Restore Input Vectors 394 //------------------------------------------------------------------------------ 395 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 396 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 397 bool skipactive, CeedOperator_Blocked *impl) { 398 CeedInt ierr; 399 CeedEvalMode emode; 400 401 for (CeedInt i=0; i<numinputfields; i++) { 402 // Skip active inputs 403 if (skipactive) { 404 CeedVector vec; 405 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 406 if (vec == CEED_VECTOR_ACTIVE) 407 continue; 408 } 409 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 410 CeedChk(ierr); 411 if (emode == CEED_EVAL_WEIGHT) { // Skip 412 } else { 413 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 414 (const CeedScalar **) &impl->edata[i]); 415 CeedChk(ierr); 416 } 417 } 418 return 0; 419 } 420 421 //------------------------------------------------------------------------------ 422 // Operator Apply 423 //------------------------------------------------------------------------------ 424 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 425 CeedVector outvec, 426 CeedRequest *request) { 427 int ierr; 428 CeedOperator_Blocked *impl; 429 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 430 const CeedInt blksize = 8; 431 CeedInt Q, numinputfields, numoutputfields, numelements, size; 432 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 433 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 434 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 435 CeedQFunction qf; 436 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 437 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 438 CeedChk(ierr); 439 CeedOperatorField *opinputfields, *opoutputfields; 440 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 441 CeedChk(ierr); 442 CeedQFunctionField *qfinputfields, *qfoutputfields; 443 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 444 CeedChk(ierr); 445 CeedEvalMode emode; 446 CeedVector vec; 447 448 // Setup 449 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 450 451 // Input Evecs and Restriction 452 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 453 opinputfields, invec, false, impl, 454 request); CeedChk(ierr); 455 456 // Output Evecs 457 for (CeedInt i=0; i<numoutputfields; i++) { 458 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 459 &impl->edata[i + numinputfields]); CeedChk(ierr); 460 } 461 462 // Loop through elements 463 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 464 // Output pointers 465 for (CeedInt i=0; i<numoutputfields; i++) { 466 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 467 CeedChk(ierr); 468 if (emode == CEED_EVAL_NONE) { 469 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 470 CeedChk(ierr); 471 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 472 CEED_USE_POINTER, 473 &impl->edata[i + numinputfields][e*Q*size]); 474 CeedChk(ierr); 475 } 476 } 477 478 // Input basis apply 479 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 480 numinputfields, blksize, false, impl); 481 CeedChk(ierr); 482 483 // Q function 484 if (!impl->identityqf) { 485 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 486 CeedChk(ierr); 487 } 488 489 // Output basis apply 490 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 491 blksize, numinputfields, 492 numoutputfields, op, impl); 493 CeedChk(ierr); 494 } 495 496 // Output restriction 497 for (CeedInt i=0; i<numoutputfields; i++) { 498 // Restore evec 499 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 500 &impl->edata[i + numinputfields]); CeedChk(ierr); 501 // Get output vector 502 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 503 // Active 504 if (vec == CEED_VECTOR_ACTIVE) 505 vec = outvec; 506 // Restrict 507 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], 508 CEED_TRANSPOSE, impl->evecs[i+impl->numein], 509 vec, request); CeedChk(ierr); 510 511 } 512 513 // Restore input arrays 514 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 515 opinputfields, false, impl); CeedChk(ierr); 516 517 return 0; 518 } 519 520 //------------------------------------------------------------------------------ 521 // Assemble Linear QFunction 522 //------------------------------------------------------------------------------ 523 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 524 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 525 int ierr; 526 CeedOperator_Blocked *impl; 527 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 528 const CeedInt blksize = 8; 529 CeedInt Q, numinputfields, numoutputfields, numelements, size; 530 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 531 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 532 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 533 CeedQFunction qf; 534 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 535 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 536 CeedChk(ierr); 537 CeedOperatorField *opinputfields, *opoutputfields; 538 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 539 CeedChk(ierr); 540 CeedQFunctionField *qfinputfields, *qfoutputfields; 541 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 542 CeedChk(ierr); 543 CeedVector vec, lvec; 544 CeedInt numactivein = 0, numactiveout = 0; 545 CeedVector *activein = NULL; 546 CeedScalar *a, *tmp; 547 Ceed ceed; 548 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 549 550 // Setup 551 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 552 553 // Check for identity 554 if (impl->identityqf) 555 // LCOV_EXCL_START 556 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 557 // LCOV_EXCL_STOP 558 559 // Input Evecs and Restriction 560 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 561 opinputfields, NULL, true, impl, 562 request); CeedChk(ierr); 563 564 // Count number of active input fields 565 for (CeedInt i=0; i<numinputfields; i++) { 566 // Get input vector 567 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 568 // Check if active input 569 if (vec == CEED_VECTOR_ACTIVE) { 570 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 571 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 572 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 573 CeedChk(ierr); 574 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 575 for (CeedInt field=0; field<size; field++) { 576 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 577 CeedChk(ierr); 578 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 579 CEED_USE_POINTER, &tmp[field*Q*blksize]); 580 CeedChk(ierr); 581 } 582 numactivein += size; 583 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 584 } 585 } 586 587 // Count number of active output fields 588 for (CeedInt i=0; i<numoutputfields; i++) { 589 // Get output vector 590 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 591 // Check if active output 592 if (vec == CEED_VECTOR_ACTIVE) { 593 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 594 numactiveout += size; 595 } 596 } 597 598 // Check sizes 599 if (!numactivein || !numactiveout) 600 // LCOV_EXCL_START 601 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 602 "and outputs"); 603 // LCOV_EXCL_STOP 604 605 // Setup lvec 606 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 607 &lvec); CeedChk(ierr); 608 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 609 610 // Create output restriction 611 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 612 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, 613 numactivein*numactiveout, 614 numactivein*numactiveout*numelements*Q, 615 strides, rstr); CeedChk(ierr); 616 // Create assembled vector 617 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 618 assembled); CeedChk(ierr); 619 620 // Loop through elements 621 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 622 // Input basis apply 623 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 624 numinputfields, blksize, true, impl); 625 CeedChk(ierr); 626 627 // Assemble QFunction 628 for (CeedInt in=0; in<numactivein; in++) { 629 // Set Inputs 630 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 631 if (numactivein > 1) { 632 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 633 0.0); CeedChk(ierr); 634 } 635 // Set Outputs 636 for (CeedInt out=0; out<numoutputfields; out++) { 637 // Get output vector 638 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 639 CeedChk(ierr); 640 // Check if active output 641 if (vec == CEED_VECTOR_ACTIVE) { 642 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 643 CEED_USE_POINTER, a); CeedChk(ierr); 644 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 645 CeedChk(ierr); 646 a += size*Q*blksize; // Advance the pointer by the size of the output 647 } 648 } 649 // Apply QFunction 650 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 651 CeedChk(ierr); 652 } 653 } 654 655 // Un-set output Qvecs to prevent accidental overwrite of Assembled 656 for (CeedInt out=0; out<numoutputfields; out++) { 657 // Get output vector 658 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 659 CeedChk(ierr); 660 // Check if active output 661 if (vec == CEED_VECTOR_ACTIVE) { 662 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 663 NULL); CeedChk(ierr); 664 } 665 } 666 667 // Restore input arrays 668 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 669 opinputfields, true, impl); CeedChk(ierr); 670 671 // Output blocked restriction 672 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 673 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 674 CeedElemRestriction blkrstr; 675 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 676 numactivein*numactiveout, numactivein*numactiveout*numelements*Q, 677 strides, &blkrstr); CeedChk(ierr); 678 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 679 request); CeedChk(ierr); 680 681 // Cleanup 682 for (CeedInt i=0; i<numactivein; i++) { 683 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 684 } 685 ierr = CeedFree(&activein); CeedChk(ierr); 686 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 687 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 688 689 return 0; 690 } 691 692 //------------------------------------------------------------------------------ 693 // Operator Destroy 694 //------------------------------------------------------------------------------ 695 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 696 int ierr; 697 CeedOperator_Blocked *impl; 698 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 699 700 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 701 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 702 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 703 } 704 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 705 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 706 ierr = CeedFree(&impl->edata); CeedChk(ierr); 707 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 708 709 for (CeedInt i=0; i<impl->numein; i++) { 710 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 711 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 712 } 713 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 714 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 715 716 for (CeedInt i=0; i<impl->numeout; i++) { 717 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 718 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 719 } 720 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 721 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 722 723 ierr = CeedFree(&impl); CeedChk(ierr); 724 return 0; 725 } 726 727 //------------------------------------------------------------------------------ 728 // Operator Create 729 //------------------------------------------------------------------------------ 730 int CeedOperatorCreate_Blocked(CeedOperator op) { 731 int ierr; 732 Ceed ceed; 733 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 734 CeedOperator_Blocked *impl; 735 736 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 737 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 738 739 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 740 CeedOperatorLinearAssembleQFunction_Blocked); 741 CeedChk(ierr); 742 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 743 CeedOperatorApply_Blocked); CeedChk(ierr); 744 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 745 CeedOperatorDestroy_Blocked); CeedChk(ierr); 746 return 0; 747 } 748 //------------------------------------------------------------------------------ 749