1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 //------------------------------------------------------------------------------ 21 // Setup Input/Output Fields 22 //------------------------------------------------------------------------------ 23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 24 CeedOperator op, bool inOrOut, 25 CeedElemRestriction *blkrestr, 26 CeedVector *fullevecs, CeedVector *evecs, 27 CeedVector *qvecs, CeedInt starte, 28 CeedInt numfields, CeedInt Q) { 29 CeedInt dim, ierr, ncomp, size, P; 30 Ceed ceed; 31 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 32 CeedBasis basis; 33 CeedElemRestriction r; 34 CeedOperatorField *opfields; 35 CeedQFunctionField *qffields; 36 if (inOrOut) { 37 ierr = CeedOperatorGetFields(op, NULL, &opfields); 38 CeedChk(ierr); 39 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 40 CeedChk(ierr); 41 } else { 42 ierr = CeedOperatorGetFields(op, &opfields, NULL); 43 CeedChk(ierr); 44 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 45 CeedChk(ierr); 46 } 47 const CeedInt blksize = 8; 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, lsize, compstride; 62 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 66 if (data->offsets) { 67 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 68 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 69 blksize, ncomp, compstride, 70 lsize, CEED_MEM_HOST, 71 CEED_COPY_VALUES, data->offsets, 72 &blkrestr[i+starte]); 73 CeedChk(ierr); 74 } else { 75 CeedInt strides[3]; 76 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 77 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 78 blksize, ncomp, lsize, strides, &blkrestr[i+starte]); 79 CeedChk(ierr); 80 } 81 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 82 &fullevecs[i+starte]); 83 CeedChk(ierr); 84 } 85 86 switch(emode) { 87 case CEED_EVAL_NONE: 88 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 89 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 90 break; 91 case CEED_EVAL_INTERP: 92 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 93 ierr = CeedElemRestrictionGetElementSize(r, &P); 94 CeedChk(ierr); 95 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 96 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 97 break; 98 case CEED_EVAL_GRAD: 99 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 100 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 101 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 102 ierr = CeedElemRestrictionGetElementSize(r, &P); 103 CeedChk(ierr); 104 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 105 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 106 break; 107 case CEED_EVAL_WEIGHT: // Only on input fields 108 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 109 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 110 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 111 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 112 CeedChk(ierr); 113 114 break; 115 case CEED_EVAL_DIV: 116 break; // Not implemented 117 case CEED_EVAL_CURL: 118 break; // Not implemented 119 } 120 } 121 return 0; 122 } 123 124 //------------------------------------------------------------------------------ 125 // Setup Operator 126 //------------------------------------------------------------------------------ 127 static int CeedOperatorSetup_Blocked(CeedOperator op) { 128 int ierr; 129 bool setupdone; 130 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 131 if (setupdone) return 0; 132 Ceed ceed; 133 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 134 CeedOperator_Blocked *impl; 135 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 136 CeedQFunction qf; 137 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 138 CeedInt Q, numinputfields, numoutputfields; 139 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 140 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 141 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 142 CeedChk(ierr); 143 CeedOperatorField *opinputfields, *opoutputfields; 144 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 145 CeedChk(ierr); 146 CeedQFunctionField *qfinputfields, *qfoutputfields; 147 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 148 CeedChk(ierr); 149 150 // Allocate 151 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 152 CeedChk(ierr); 153 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 154 CeedChk(ierr); 155 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 156 CeedChk(ierr); 157 158 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 159 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 160 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 161 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 162 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 163 164 impl->numein = numinputfields; impl->numeout = numoutputfields; 165 166 // Set up infield and outfield pointer arrays 167 // Infields 168 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 169 impl->evecs, impl->evecsin, 170 impl->qvecsin, 0, 171 numinputfields, Q); 172 CeedChk(ierr); 173 // Outfields 174 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 175 impl->evecs, impl->evecsout, 176 impl->qvecsout, numinputfields, 177 numoutputfields, Q); 178 CeedChk(ierr); 179 180 // Identity QFunctions 181 if (impl->identityqf) { 182 CeedEvalMode inmode, outmode; 183 CeedQFunctionField *infields, *outfields; 184 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 185 186 for (CeedInt i=0; i<numinputfields; i++) { 187 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 188 CeedChk(ierr); 189 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 190 CeedChk(ierr); 191 192 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 193 impl->qvecsout[i] = impl->qvecsin[i]; 194 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 195 } 196 } 197 198 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 199 200 return 0; 201 } 202 203 //------------------------------------------------------------------------------ 204 // Setup Operator Inputs 205 //------------------------------------------------------------------------------ 206 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 207 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 208 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 209 CeedRequest *request) { 210 CeedInt ierr; 211 CeedEvalMode emode; 212 CeedVector vec; 213 uint64_t state; 214 215 for (CeedInt i=0; i<numinputfields; i++) { 216 // Get input vector 217 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 218 if (vec == CEED_VECTOR_ACTIVE) { 219 if (skipactive) 220 continue; 221 else 222 vec = invec; 223 } 224 225 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 226 CeedChk(ierr); 227 if (emode == CEED_EVAL_WEIGHT) { // Skip 228 } else { 229 // Restrict 230 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 231 if (state != impl->inputstate[i] || vec == invec) { 232 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 233 vec, impl->evecs[i], request); 234 CeedChk(ierr); 235 impl->inputstate[i] = state; 236 } 237 // Get evec 238 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 239 (const CeedScalar **) &impl->edata[i]); 240 CeedChk(ierr); 241 } 242 } 243 return 0; 244 } 245 246 //------------------------------------------------------------------------------ 247 // Input Basis Action 248 //------------------------------------------------------------------------------ 249 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 250 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 251 CeedInt numinputfields, CeedInt blksize, bool skipactive, 252 CeedOperator_Blocked *impl) { 253 CeedInt ierr; 254 CeedInt dim, elemsize, size; 255 CeedElemRestriction Erestrict; 256 CeedEvalMode emode; 257 CeedBasis basis; 258 259 for (CeedInt i=0; i<numinputfields; i++) { 260 // Skip active input 261 if (skipactive) { 262 CeedVector vec; 263 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 264 if (vec == CEED_VECTOR_ACTIVE) 265 continue; 266 } 267 268 // Get elemsize, emode, size 269 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 270 CeedChk(ierr); 271 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 272 CeedChk(ierr); 273 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 274 CeedChk(ierr); 275 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 276 // Basis action 277 switch(emode) { 278 case CEED_EVAL_NONE: 279 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 280 CEED_USE_POINTER, 281 &impl->edata[i][e*Q*size]); CeedChk(ierr); 282 break; 283 case CEED_EVAL_INTERP: 284 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 285 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 286 CEED_USE_POINTER, 287 &impl->edata[i][e*elemsize*size]); 288 CeedChk(ierr); 289 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 290 CEED_EVAL_INTERP, impl->evecsin[i], 291 impl->qvecsin[i]); CeedChk(ierr); 292 break; 293 case CEED_EVAL_GRAD: 294 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 295 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 296 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 297 CEED_USE_POINTER, 298 &impl->edata[i][e*elemsize*size/dim]); 299 CeedChk(ierr); 300 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 301 CEED_EVAL_GRAD, impl->evecsin[i], 302 impl->qvecsin[i]); CeedChk(ierr); 303 break; 304 case CEED_EVAL_WEIGHT: 305 break; // No action 306 // LCOV_EXCL_START 307 case CEED_EVAL_DIV: 308 case CEED_EVAL_CURL: { 309 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 310 CeedChk(ierr); 311 Ceed ceed; 312 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 313 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 314 // LCOV_EXCL_STOP 315 } 316 } 317 } 318 return 0; 319 } 320 321 //------------------------------------------------------------------------------ 322 // Output Basis Action 323 //------------------------------------------------------------------------------ 324 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 325 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 326 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 327 CeedOperator op, CeedOperator_Blocked *impl) { 328 CeedInt ierr; 329 CeedInt dim, elemsize, size; 330 CeedElemRestriction Erestrict; 331 CeedEvalMode emode; 332 CeedBasis basis; 333 334 for (CeedInt i=0; i<numoutputfields; i++) { 335 // Get elemsize, emode, size 336 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 337 CeedChk(ierr); 338 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 339 CeedChk(ierr); 340 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 341 CeedChk(ierr); 342 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 343 // Basis action 344 switch(emode) { 345 case CEED_EVAL_NONE: 346 break; // No action 347 case CEED_EVAL_INTERP: 348 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 349 CeedChk(ierr); 350 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 351 CEED_USE_POINTER, 352 &impl->edata[i + numinputfields][e*elemsize*size]); 353 CeedChk(ierr); 354 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 355 CEED_EVAL_INTERP, impl->qvecsout[i], 356 impl->evecsout[i]); CeedChk(ierr); 357 break; 358 case CEED_EVAL_GRAD: 359 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 360 CeedChk(ierr); 361 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 362 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 363 CEED_USE_POINTER, 364 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 365 CeedChk(ierr); 366 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 367 CEED_EVAL_GRAD, impl->qvecsout[i], 368 impl->evecsout[i]); CeedChk(ierr); 369 break; 370 // LCOV_EXCL_START 371 case CEED_EVAL_WEIGHT: { 372 Ceed ceed; 373 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 374 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 375 "evaluation mode"); 376 } 377 case CEED_EVAL_DIV: 378 case CEED_EVAL_CURL: { 379 Ceed ceed; 380 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 381 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 382 // LCOV_EXCL_STOP 383 } 384 } 385 } 386 return 0; 387 } 388 389 //------------------------------------------------------------------------------ 390 // Restore Input Vectors 391 //------------------------------------------------------------------------------ 392 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 393 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 394 bool skipactive, CeedOperator_Blocked *impl) { 395 CeedInt ierr; 396 CeedEvalMode emode; 397 398 for (CeedInt i=0; i<numinputfields; i++) { 399 // Skip active inputs 400 if (skipactive) { 401 CeedVector vec; 402 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 403 if (vec == CEED_VECTOR_ACTIVE) 404 continue; 405 } 406 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 407 CeedChk(ierr); 408 if (emode == CEED_EVAL_WEIGHT) { // Skip 409 } else { 410 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 411 (const CeedScalar **) &impl->edata[i]); 412 CeedChk(ierr); 413 } 414 } 415 return 0; 416 } 417 418 //------------------------------------------------------------------------------ 419 // Operator Apply 420 //------------------------------------------------------------------------------ 421 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 422 CeedVector outvec, 423 CeedRequest *request) { 424 int ierr; 425 CeedOperator_Blocked *impl; 426 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 427 const CeedInt blksize = 8; 428 CeedInt Q, numinputfields, numoutputfields, numelements, size; 429 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 430 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 431 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 432 CeedQFunction qf; 433 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 434 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 435 CeedChk(ierr); 436 CeedOperatorField *opinputfields, *opoutputfields; 437 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 438 CeedChk(ierr); 439 CeedQFunctionField *qfinputfields, *qfoutputfields; 440 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 441 CeedChk(ierr); 442 CeedEvalMode emode; 443 CeedVector vec; 444 445 // Setup 446 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 447 448 // Input Evecs and Restriction 449 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 450 opinputfields, invec, false, impl, 451 request); CeedChk(ierr); 452 453 // Output Evecs 454 for (CeedInt i=0; i<numoutputfields; i++) { 455 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 456 &impl->edata[i + numinputfields]); CeedChk(ierr); 457 } 458 459 // Loop through elements 460 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 461 // Output pointers 462 for (CeedInt i=0; i<numoutputfields; i++) { 463 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 464 CeedChk(ierr); 465 if (emode == CEED_EVAL_NONE) { 466 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 467 CeedChk(ierr); 468 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 469 CEED_USE_POINTER, 470 &impl->edata[i + numinputfields][e*Q*size]); 471 CeedChk(ierr); 472 } 473 } 474 475 // Input basis apply 476 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 477 numinputfields, blksize, false, impl); 478 CeedChk(ierr); 479 480 // Q function 481 if (!impl->identityqf) { 482 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 483 CeedChk(ierr); 484 } 485 486 // Output basis apply 487 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 488 blksize, numinputfields, 489 numoutputfields, op, impl); 490 CeedChk(ierr); 491 } 492 493 // Output restriction 494 for (CeedInt i=0; i<numoutputfields; i++) { 495 // Restore evec 496 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 497 &impl->edata[i + numinputfields]); CeedChk(ierr); 498 // Get output vector 499 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 500 // Active 501 if (vec == CEED_VECTOR_ACTIVE) 502 vec = outvec; 503 // Restrict 504 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], 505 CEED_TRANSPOSE, impl->evecs[i+impl->numein], 506 vec, request); CeedChk(ierr); 507 508 } 509 510 // Restore input arrays 511 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 512 opinputfields, false, impl); CeedChk(ierr); 513 514 return 0; 515 } 516 517 //------------------------------------------------------------------------------ 518 // Assemble Linear QFunction 519 //------------------------------------------------------------------------------ 520 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 521 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 522 int ierr; 523 CeedOperator_Blocked *impl; 524 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 525 const CeedInt blksize = 8; 526 CeedInt Q, numinputfields, numoutputfields, numelements, size; 527 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 528 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 529 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 530 CeedQFunction qf; 531 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 532 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 533 CeedChk(ierr); 534 CeedOperatorField *opinputfields, *opoutputfields; 535 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 536 CeedChk(ierr); 537 CeedQFunctionField *qfinputfields, *qfoutputfields; 538 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 539 CeedChk(ierr); 540 CeedVector vec, lvec; 541 CeedInt numactivein = 0, numactiveout = 0; 542 CeedVector *activein = NULL; 543 CeedScalar *a, *tmp; 544 Ceed ceed; 545 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 546 547 // Setup 548 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 549 550 // Check for identity 551 if (impl->identityqf) 552 // LCOV_EXCL_START 553 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 554 // LCOV_EXCL_STOP 555 556 // Input Evecs and Restriction 557 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 558 opinputfields, NULL, true, impl, 559 request); CeedChk(ierr); 560 561 // Count number of active input fields 562 for (CeedInt i=0; i<numinputfields; i++) { 563 // Get input vector 564 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 565 // Check if active input 566 if (vec == CEED_VECTOR_ACTIVE) { 567 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 568 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 569 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 570 CeedChk(ierr); 571 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 572 for (CeedInt field=0; field<size; field++) { 573 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 574 CeedChk(ierr); 575 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 576 CEED_USE_POINTER, &tmp[field*Q*blksize]); 577 CeedChk(ierr); 578 } 579 numactivein += size; 580 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 581 } 582 } 583 584 // Count number of active output fields 585 for (CeedInt i=0; i<numoutputfields; i++) { 586 // Get output vector 587 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 588 // Check if active output 589 if (vec == CEED_VECTOR_ACTIVE) { 590 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 591 numactiveout += size; 592 } 593 } 594 595 // Check sizes 596 if (!numactivein || !numactiveout) 597 // LCOV_EXCL_START 598 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 599 "and outputs"); 600 // LCOV_EXCL_STOP 601 602 // Setup lvec 603 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 604 &lvec); CeedChk(ierr); 605 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 606 607 // Create output restriction 608 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 609 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, 610 numactivein*numactiveout, 611 numactivein*numactiveout*numelements*Q, 612 strides, rstr); CeedChk(ierr); 613 // Create assembled vector 614 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 615 assembled); CeedChk(ierr); 616 617 // Loop through elements 618 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 619 // Input basis apply 620 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 621 numinputfields, blksize, true, impl); 622 CeedChk(ierr); 623 624 // Assemble QFunction 625 for (CeedInt in=0; in<numactivein; in++) { 626 // Set Inputs 627 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 628 if (numactivein > 1) { 629 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 630 0.0); CeedChk(ierr); 631 } 632 // Set Outputs 633 for (CeedInt out=0; out<numoutputfields; out++) { 634 // Get output vector 635 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 636 CeedChk(ierr); 637 // Check if active output 638 if (vec == CEED_VECTOR_ACTIVE) { 639 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 640 CEED_USE_POINTER, a); CeedChk(ierr); 641 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 642 CeedChk(ierr); 643 a += size*Q*blksize; // Advance the pointer by the size of the output 644 } 645 } 646 // Apply QFunction 647 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 648 CeedChk(ierr); 649 } 650 } 651 652 // Un-set output Qvecs to prevent accidental overwrite of Assembled 653 for (CeedInt out=0; out<numoutputfields; out++) { 654 // Get output vector 655 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 656 CeedChk(ierr); 657 // Check if active output 658 if (vec == CEED_VECTOR_ACTIVE) { 659 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 660 NULL); CeedChk(ierr); 661 } 662 } 663 664 // Restore input arrays 665 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 666 opinputfields, true, impl); CeedChk(ierr); 667 668 // Output blocked restriction 669 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 670 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 671 CeedElemRestriction blkrstr; 672 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 673 numactivein*numactiveout, numactivein*numactiveout*numelements*Q, 674 strides, &blkrstr); CeedChk(ierr); 675 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 676 request); CeedChk(ierr); 677 678 // Cleanup 679 for (CeedInt i=0; i<numactivein; i++) { 680 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 681 } 682 ierr = CeedFree(&activein); CeedChk(ierr); 683 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 684 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 685 686 return 0; 687 } 688 689 //------------------------------------------------------------------------------ 690 // Operator Destroy 691 //------------------------------------------------------------------------------ 692 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 693 int ierr; 694 CeedOperator_Blocked *impl; 695 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 696 697 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 698 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 699 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 700 } 701 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 702 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 703 ierr = CeedFree(&impl->edata); CeedChk(ierr); 704 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 705 706 for (CeedInt i=0; i<impl->numein; i++) { 707 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 708 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 709 } 710 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 711 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 712 713 for (CeedInt i=0; i<impl->numeout; i++) { 714 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 715 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 716 } 717 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 718 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 719 720 ierr = CeedFree(&impl); CeedChk(ierr); 721 return 0; 722 } 723 724 //------------------------------------------------------------------------------ 725 // Operator Create 726 //------------------------------------------------------------------------------ 727 int CeedOperatorCreate_Blocked(CeedOperator op) { 728 int ierr; 729 Ceed ceed; 730 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 731 CeedOperator_Blocked *impl; 732 733 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 734 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 735 736 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 737 CeedOperatorAssembleLinearQFunction_Blocked); 738 CeedChk(ierr); 739 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 740 CeedOperatorApply_Blocked); CeedChk(ierr); 741 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 742 CeedOperatorDestroy_Blocked); CeedChk(ierr); 743 return 0; 744 } 745 //------------------------------------------------------------------------------ 746