1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed.h> 18 #include <ceed-backend.h> 19 #include <stdbool.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 #include "ceed-blocked.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 28 CeedOperator op, bool inOrOut, 29 CeedElemRestriction *blkrestr, 30 CeedVector *fullevecs, CeedVector *evecs, 31 CeedVector *qvecs, CeedInt starte, 32 CeedInt numfields, CeedInt Q) { 33 CeedInt dim, ierr, ncomp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *opfields; 39 CeedQFunctionField *qffields; 40 if (inOrOut) { 41 ierr = CeedOperatorGetFields(op, NULL, &opfields); 42 CeedChk(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 44 CeedChk(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, &opfields, NULL); 47 CeedChk(ierr); 48 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 49 CeedChk(ierr); 50 } 51 const CeedInt blksize = 8; 52 53 // Loop over fields 54 for (CeedInt i=0; i<numfields; i++) { 55 CeedEvalMode emode; 56 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 57 58 if (emode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 60 CeedChk(ierr); 61 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 62 CeedInt nelem, elemsize, lsize, compstride; 63 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 66 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 67 68 bool strided; 69 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChk(ierr); 70 if (strided) { 71 CeedInt strides[3]; 72 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 73 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 74 blksize, ncomp, lsize, strides, &blkrestr[i+starte]); 75 CeedChk(ierr); 76 } else { 77 const CeedInt *offsets = NULL; 78 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 79 CeedChk(ierr); 80 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 81 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 82 blksize, ncomp, compstride, 83 lsize, CEED_MEM_HOST, 84 CEED_COPY_VALUES, offsets, 85 &blkrestr[i+starte]); 86 CeedChk(ierr); 87 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChk(ierr); 88 } 89 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 90 &fullevecs[i+starte]); 91 CeedChk(ierr); 92 } 93 94 switch(emode) { 95 case CEED_EVAL_NONE: 96 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 97 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 98 break; 99 case CEED_EVAL_INTERP: 100 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 101 ierr = CeedElemRestrictionGetElementSize(r, &P); 102 CeedChk(ierr); 103 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 104 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 105 break; 106 case CEED_EVAL_GRAD: 107 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 108 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 109 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 110 ierr = CeedElemRestrictionGetElementSize(r, &P); 111 CeedChk(ierr); 112 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 113 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 114 break; 115 case CEED_EVAL_WEIGHT: // Only on input fields 116 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 117 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 118 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 119 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 120 CeedChk(ierr); 121 122 break; 123 case CEED_EVAL_DIV: 124 break; // Not implemented 125 case CEED_EVAL_CURL: 126 break; // Not implemented 127 } 128 } 129 return 0; 130 } 131 132 //------------------------------------------------------------------------------ 133 // Setup Operator 134 //------------------------------------------------------------------------------ 135 static int CeedOperatorSetup_Blocked(CeedOperator op) { 136 int ierr; 137 bool setupdone; 138 ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChk(ierr); 139 if (setupdone) return 0; 140 Ceed ceed; 141 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 142 CeedOperator_Blocked *impl; 143 ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr); 144 CeedQFunction qf; 145 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 146 CeedInt Q, numinputfields, numoutputfields; 147 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 148 ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChk(ierr); 149 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 150 CeedChk(ierr); 151 CeedOperatorField *opinputfields, *opoutputfields; 152 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 153 CeedChk(ierr); 154 CeedQFunctionField *qfinputfields, *qfoutputfields; 155 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 156 CeedChk(ierr); 157 158 // Allocate 159 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 160 CeedChk(ierr); 161 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 162 CeedChk(ierr); 163 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 164 CeedChk(ierr); 165 166 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 167 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 168 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 169 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 170 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 171 172 impl->numein = numinputfields; impl->numeout = numoutputfields; 173 174 // Set up infield and outfield pointer arrays 175 // Infields 176 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 177 impl->evecs, impl->evecsin, 178 impl->qvecsin, 0, 179 numinputfields, Q); 180 CeedChk(ierr); 181 // Outfields 182 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 183 impl->evecs, impl->evecsout, 184 impl->qvecsout, numinputfields, 185 numoutputfields, Q); 186 CeedChk(ierr); 187 188 // Identity QFunctions 189 if (impl->identityqf) { 190 CeedEvalMode inmode, outmode; 191 CeedQFunctionField *infields, *outfields; 192 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 193 194 for (CeedInt i=0; i<numinputfields; i++) { 195 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 196 CeedChk(ierr); 197 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 198 CeedChk(ierr); 199 200 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 201 impl->qvecsout[i] = impl->qvecsin[i]; 202 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 203 } 204 } 205 206 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 207 208 return 0; 209 } 210 211 //------------------------------------------------------------------------------ 212 // Setup Operator Inputs 213 //------------------------------------------------------------------------------ 214 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 215 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 216 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 217 CeedRequest *request) { 218 CeedInt ierr; 219 CeedEvalMode emode; 220 CeedVector vec; 221 uint64_t state; 222 223 for (CeedInt i=0; i<numinputfields; i++) { 224 // Get input vector 225 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 226 if (vec == CEED_VECTOR_ACTIVE) { 227 if (skipactive) 228 continue; 229 else 230 vec = invec; 231 } 232 233 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 234 CeedChk(ierr); 235 if (emode == CEED_EVAL_WEIGHT) { // Skip 236 } else { 237 // Restrict 238 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 239 if (state != impl->inputstate[i] || vec == invec) { 240 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 241 vec, impl->evecs[i], request); 242 CeedChk(ierr); 243 impl->inputstate[i] = state; 244 } 245 // Get evec 246 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 247 (const CeedScalar **) &impl->edata[i]); 248 CeedChk(ierr); 249 } 250 } 251 return 0; 252 } 253 254 //------------------------------------------------------------------------------ 255 // Input Basis Action 256 //------------------------------------------------------------------------------ 257 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 258 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 259 CeedInt numinputfields, CeedInt blksize, bool skipactive, 260 CeedOperator_Blocked *impl) { 261 CeedInt ierr; 262 CeedInt dim, elemsize, size; 263 CeedElemRestriction Erestrict; 264 CeedEvalMode emode; 265 CeedBasis basis; 266 267 for (CeedInt i=0; i<numinputfields; i++) { 268 // Skip active input 269 if (skipactive) { 270 CeedVector vec; 271 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 272 if (vec == CEED_VECTOR_ACTIVE) 273 continue; 274 } 275 276 // Get elemsize, emode, size 277 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 278 CeedChk(ierr); 279 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 280 CeedChk(ierr); 281 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 282 CeedChk(ierr); 283 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 284 // Basis action 285 switch(emode) { 286 case CEED_EVAL_NONE: 287 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 288 CEED_USE_POINTER, 289 &impl->edata[i][e*Q*size]); CeedChk(ierr); 290 break; 291 case CEED_EVAL_INTERP: 292 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 293 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 294 CEED_USE_POINTER, 295 &impl->edata[i][e*elemsize*size]); 296 CeedChk(ierr); 297 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 298 CEED_EVAL_INTERP, impl->evecsin[i], 299 impl->qvecsin[i]); CeedChk(ierr); 300 break; 301 case CEED_EVAL_GRAD: 302 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 303 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 304 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 305 CEED_USE_POINTER, 306 &impl->edata[i][e*elemsize*size/dim]); 307 CeedChk(ierr); 308 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 309 CEED_EVAL_GRAD, impl->evecsin[i], 310 impl->qvecsin[i]); CeedChk(ierr); 311 break; 312 case CEED_EVAL_WEIGHT: 313 break; // No action 314 // LCOV_EXCL_START 315 case CEED_EVAL_DIV: 316 case CEED_EVAL_CURL: { 317 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 318 CeedChk(ierr); 319 Ceed ceed; 320 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 321 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 322 // LCOV_EXCL_STOP 323 } 324 } 325 } 326 return 0; 327 } 328 329 //------------------------------------------------------------------------------ 330 // Output Basis Action 331 //------------------------------------------------------------------------------ 332 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 333 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 334 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 335 CeedOperator op, CeedOperator_Blocked *impl) { 336 CeedInt ierr; 337 CeedInt dim, elemsize, size; 338 CeedElemRestriction Erestrict; 339 CeedEvalMode emode; 340 CeedBasis basis; 341 342 for (CeedInt i=0; i<numoutputfields; i++) { 343 // Get elemsize, emode, size 344 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 345 CeedChk(ierr); 346 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 347 CeedChk(ierr); 348 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 349 CeedChk(ierr); 350 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 351 // Basis action 352 switch(emode) { 353 case CEED_EVAL_NONE: 354 break; // No action 355 case CEED_EVAL_INTERP: 356 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 357 CeedChk(ierr); 358 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 359 CEED_USE_POINTER, 360 &impl->edata[i + numinputfields][e*elemsize*size]); 361 CeedChk(ierr); 362 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 363 CEED_EVAL_INTERP, impl->qvecsout[i], 364 impl->evecsout[i]); CeedChk(ierr); 365 break; 366 case CEED_EVAL_GRAD: 367 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 368 CeedChk(ierr); 369 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 370 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 371 CEED_USE_POINTER, 372 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 373 CeedChk(ierr); 374 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 375 CEED_EVAL_GRAD, impl->qvecsout[i], 376 impl->evecsout[i]); CeedChk(ierr); 377 break; 378 // LCOV_EXCL_START 379 case CEED_EVAL_WEIGHT: { 380 Ceed ceed; 381 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 382 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 383 "evaluation mode"); 384 } 385 case CEED_EVAL_DIV: 386 case CEED_EVAL_CURL: { 387 Ceed ceed; 388 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 389 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 390 // LCOV_EXCL_STOP 391 } 392 } 393 } 394 return 0; 395 } 396 397 //------------------------------------------------------------------------------ 398 // Restore Input Vectors 399 //------------------------------------------------------------------------------ 400 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 401 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 402 bool skipactive, CeedOperator_Blocked *impl) { 403 CeedInt ierr; 404 CeedEvalMode emode; 405 406 for (CeedInt i=0; i<numinputfields; i++) { 407 // Skip active inputs 408 if (skipactive) { 409 CeedVector vec; 410 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 411 if (vec == CEED_VECTOR_ACTIVE) 412 continue; 413 } 414 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 415 CeedChk(ierr); 416 if (emode == CEED_EVAL_WEIGHT) { // Skip 417 } else { 418 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 419 (const CeedScalar **) &impl->edata[i]); 420 CeedChk(ierr); 421 } 422 } 423 return 0; 424 } 425 426 //------------------------------------------------------------------------------ 427 // Operator Apply 428 //------------------------------------------------------------------------------ 429 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector invec, 430 CeedVector outvec, 431 CeedRequest *request) { 432 int ierr; 433 CeedOperator_Blocked *impl; 434 ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr); 435 const CeedInt blksize = 8; 436 CeedInt Q, numinputfields, numoutputfields, numelements, size; 437 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 438 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 439 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 440 CeedQFunction qf; 441 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 442 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 443 CeedChk(ierr); 444 CeedOperatorField *opinputfields, *opoutputfields; 445 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 446 CeedChk(ierr); 447 CeedQFunctionField *qfinputfields, *qfoutputfields; 448 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 449 CeedChk(ierr); 450 CeedEvalMode emode; 451 CeedVector vec; 452 453 // Setup 454 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 455 456 // Input Evecs and Restriction 457 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 458 opinputfields, invec, false, impl, 459 request); CeedChk(ierr); 460 461 // Output Evecs 462 for (CeedInt i=0; i<numoutputfields; i++) { 463 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 464 &impl->edata[i + numinputfields]); CeedChk(ierr); 465 } 466 467 // Loop through elements 468 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 469 // Output pointers 470 for (CeedInt i=0; i<numoutputfields; i++) { 471 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 472 CeedChk(ierr); 473 if (emode == CEED_EVAL_NONE) { 474 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 475 CeedChk(ierr); 476 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 477 CEED_USE_POINTER, 478 &impl->edata[i + numinputfields][e*Q*size]); 479 CeedChk(ierr); 480 } 481 } 482 483 // Input basis apply 484 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 485 numinputfields, blksize, false, impl); 486 CeedChk(ierr); 487 488 // Q function 489 if (!impl->identityqf) { 490 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 491 CeedChk(ierr); 492 } 493 494 // Output basis apply 495 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 496 blksize, numinputfields, 497 numoutputfields, op, impl); 498 CeedChk(ierr); 499 } 500 501 // Output restriction 502 for (CeedInt i=0; i<numoutputfields; i++) { 503 // Restore evec 504 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 505 &impl->edata[i + numinputfields]); CeedChk(ierr); 506 // Get output vector 507 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 508 // Active 509 if (vec == CEED_VECTOR_ACTIVE) 510 vec = outvec; 511 // Restrict 512 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], 513 CEED_TRANSPOSE, impl->evecs[i+impl->numein], 514 vec, request); CeedChk(ierr); 515 516 } 517 518 // Restore input arrays 519 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 520 opinputfields, false, impl); CeedChk(ierr); 521 522 return 0; 523 } 524 525 //------------------------------------------------------------------------------ 526 // Assemble Linear QFunction 527 //------------------------------------------------------------------------------ 528 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 529 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 530 int ierr; 531 CeedOperator_Blocked *impl; 532 ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr); 533 const CeedInt blksize = 8; 534 CeedInt Q, numinputfields, numoutputfields, numelements, size; 535 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 536 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 537 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 538 CeedQFunction qf; 539 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 540 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 541 CeedChk(ierr); 542 CeedOperatorField *opinputfields, *opoutputfields; 543 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 544 CeedChk(ierr); 545 CeedQFunctionField *qfinputfields, *qfoutputfields; 546 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 547 CeedChk(ierr); 548 CeedVector vec, lvec; 549 CeedInt numactivein = 0, numactiveout = 0; 550 CeedVector *activein = NULL; 551 CeedScalar *a, *tmp; 552 Ceed ceed; 553 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 554 555 // Setup 556 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 557 558 // Check for identity 559 if (impl->identityqf) 560 // LCOV_EXCL_START 561 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 562 // LCOV_EXCL_STOP 563 564 // Input Evecs and Restriction 565 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 566 opinputfields, NULL, true, impl, 567 request); CeedChk(ierr); 568 569 // Count number of active input fields 570 for (CeedInt i=0; i<numinputfields; i++) { 571 // Get input vector 572 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 573 // Check if active input 574 if (vec == CEED_VECTOR_ACTIVE) { 575 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 576 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 577 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 578 CeedChk(ierr); 579 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 580 for (CeedInt field=0; field<size; field++) { 581 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 582 CeedChk(ierr); 583 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 584 CEED_USE_POINTER, &tmp[field*Q*blksize]); 585 CeedChk(ierr); 586 } 587 numactivein += size; 588 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 589 } 590 } 591 592 // Count number of active output fields 593 for (CeedInt i=0; i<numoutputfields; i++) { 594 // Get output vector 595 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 596 // Check if active output 597 if (vec == CEED_VECTOR_ACTIVE) { 598 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 599 numactiveout += size; 600 } 601 } 602 603 // Check sizes 604 if (!numactivein || !numactiveout) 605 // LCOV_EXCL_START 606 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 607 "and outputs"); 608 // LCOV_EXCL_STOP 609 610 // Setup lvec 611 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 612 &lvec); CeedChk(ierr); 613 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 614 615 // Create output restriction 616 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 617 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, 618 numactivein*numactiveout, 619 numactivein*numactiveout*numelements*Q, 620 strides, rstr); CeedChk(ierr); 621 // Create assembled vector 622 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 623 assembled); CeedChk(ierr); 624 625 // Loop through elements 626 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 627 // Input basis apply 628 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 629 numinputfields, blksize, true, impl); 630 CeedChk(ierr); 631 632 // Assemble QFunction 633 for (CeedInt in=0; in<numactivein; in++) { 634 // Set Inputs 635 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 636 if (numactivein > 1) { 637 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 638 0.0); CeedChk(ierr); 639 } 640 // Set Outputs 641 for (CeedInt out=0; out<numoutputfields; out++) { 642 // Get output vector 643 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 644 CeedChk(ierr); 645 // Check if active output 646 if (vec == CEED_VECTOR_ACTIVE) { 647 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 648 CEED_USE_POINTER, a); CeedChk(ierr); 649 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 650 CeedChk(ierr); 651 a += size*Q*blksize; // Advance the pointer by the size of the output 652 } 653 } 654 // Apply QFunction 655 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 656 CeedChk(ierr); 657 } 658 } 659 660 // Un-set output Qvecs to prevent accidental overwrite of Assembled 661 for (CeedInt out=0; out<numoutputfields; out++) { 662 // Get output vector 663 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 664 CeedChk(ierr); 665 // Check if active output 666 if (vec == CEED_VECTOR_ACTIVE) { 667 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 668 NULL); CeedChk(ierr); 669 } 670 } 671 672 // Restore input arrays 673 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 674 opinputfields, true, impl); CeedChk(ierr); 675 676 // Output blocked restriction 677 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 678 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 679 CeedElemRestriction blkrstr; 680 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 681 numactivein*numactiveout, numactivein*numactiveout*numelements*Q, 682 strides, &blkrstr); CeedChk(ierr); 683 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 684 request); CeedChk(ierr); 685 686 // Cleanup 687 for (CeedInt i=0; i<numactivein; i++) { 688 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 689 } 690 ierr = CeedFree(&activein); CeedChk(ierr); 691 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 692 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 693 694 return 0; 695 } 696 697 //------------------------------------------------------------------------------ 698 // Operator Destroy 699 //------------------------------------------------------------------------------ 700 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 701 int ierr; 702 CeedOperator_Blocked *impl; 703 ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr); 704 705 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 706 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 707 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 708 } 709 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 710 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 711 ierr = CeedFree(&impl->edata); CeedChk(ierr); 712 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 713 714 for (CeedInt i=0; i<impl->numein; i++) { 715 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 716 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 717 } 718 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 719 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 720 721 for (CeedInt i=0; i<impl->numeout; i++) { 722 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 723 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 724 } 725 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 726 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 727 728 ierr = CeedFree(&impl); CeedChk(ierr); 729 return 0; 730 } 731 732 //------------------------------------------------------------------------------ 733 // Operator Create 734 //------------------------------------------------------------------------------ 735 int CeedOperatorCreate_Blocked(CeedOperator op) { 736 int ierr; 737 Ceed ceed; 738 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 739 CeedOperator_Blocked *impl; 740 741 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 742 ierr = CeedOperatorSetData(op, impl); CeedChk(ierr); 743 744 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 745 CeedOperatorLinearAssembleQFunction_Blocked); 746 CeedChk(ierr); 747 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 748 CeedOperatorApplyAdd_Blocked); CeedChk(ierr); 749 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 750 CeedOperatorDestroy_Blocked); CeedChk(ierr); 751 return 0; 752 } 753 //------------------------------------------------------------------------------ 754