1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 20 //------------------------------------------------------------------------------ 21 // Setup Input/Output Fields 22 //------------------------------------------------------------------------------ 23 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 24 bool inOrOut, const CeedInt blksize, 25 CeedElemRestriction *blkrestr, 26 CeedVector *fullevecs, CeedVector *evecs, 27 CeedVector *qvecs, CeedInt starte, 28 CeedInt numfields, CeedInt Q) { 29 CeedInt dim, ierr, ncomp, size, P; 30 Ceed ceed; 31 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 32 CeedBasis basis; 33 CeedElemRestriction r; 34 CeedOperatorField *opfields; 35 CeedQFunctionField *qffields; 36 if (inOrOut) { 37 ierr = CeedOperatorGetFields(op, NULL, &opfields); 38 CeedChk(ierr); 39 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 40 CeedChk(ierr); 41 } else { 42 ierr = CeedOperatorGetFields(op, &opfields, NULL); 43 CeedChk(ierr); 44 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 45 CeedChk(ierr); 46 } 47 48 // Loop over fields 49 for (CeedInt i=0; i<numfields; i++) { 50 CeedEvalMode emode; 51 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 52 53 if (emode != CEED_EVAL_WEIGHT) { 54 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 55 CeedChk(ierr); 56 Ceed ceed; 57 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 58 CeedInt nelem, elemsize, lsize, compstride; 59 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 60 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 61 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 62 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 63 64 bool strided; 65 ierr = CeedElemRestrictionGetStridedStatus(r, &strided); CeedChk(ierr); 66 if (strided) { 67 CeedInt strides[3]; 68 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 69 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 70 blksize, ncomp, lsize, strides, &blkrestr[i+starte]); 71 CeedChk(ierr); 72 } else { 73 const CeedInt *offsets = NULL; 74 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 75 CeedChk(ierr); 76 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 77 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 78 blksize, ncomp, compstride, 79 lsize, CEED_MEM_HOST, 80 CEED_COPY_VALUES, offsets, 81 &blkrestr[i+starte]); 82 CeedChk(ierr); 83 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChk(ierr); 84 } 85 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 86 &fullevecs[i+starte]); 87 CeedChk(ierr); 88 } 89 90 switch(emode) { 91 case CEED_EVAL_NONE: 92 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 93 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 94 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 95 break; 96 case CEED_EVAL_INTERP: 97 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 98 ierr = CeedElemRestrictionGetElementSize(r, &P); 99 CeedChk(ierr); 100 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 101 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 102 break; 103 case CEED_EVAL_GRAD: 104 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 105 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 106 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 107 ierr = CeedElemRestrictionGetElementSize(r, &P); 108 CeedChk(ierr); 109 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 110 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 111 break; 112 case CEED_EVAL_WEIGHT: // Only on input fields 113 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 114 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 115 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 116 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 117 CeedChk(ierr); 118 119 break; 120 case CEED_EVAL_DIV: 121 break; // Not implimented 122 case CEED_EVAL_CURL: 123 break; // Not implimented 124 } 125 } 126 return 0; 127 } 128 129 //------------------------------------------------------------------------------ 130 // Setup Operator 131 //------------------------------------------------------------------------------ 132 static int CeedOperatorSetup_Opt(CeedOperator op) { 133 int ierr; 134 bool setupdone; 135 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 136 if (setupdone) return 0; 137 Ceed ceed; 138 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 139 Ceed_Opt *ceedimpl; 140 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 141 const CeedInt blksize = ceedimpl->blksize; 142 CeedOperator_Opt *impl; 143 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 144 CeedQFunction qf; 145 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 146 CeedInt Q, numinputfields, numoutputfields; 147 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 148 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 149 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 150 CeedChk(ierr); 151 CeedOperatorField *opinputfields, *opoutputfields; 152 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 153 CeedChk(ierr); 154 CeedQFunctionField *qfinputfields, *qfoutputfields; 155 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 156 CeedChk(ierr); 157 158 // Allocate 159 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 160 CeedChk(ierr); 161 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 162 CeedChk(ierr); 163 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 164 CeedChk(ierr); 165 166 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 167 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 168 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 169 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 170 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 171 172 impl->numein = numinputfields; impl->numeout = numoutputfields; 173 174 // Set up infield and outfield pointer arrays 175 // Infields 176 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 177 impl->evecs, impl->evecsin, 178 impl->qvecsin, 0, 179 numinputfields, Q); 180 CeedChk(ierr); 181 // Outfields 182 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 183 impl->evecs, impl->evecsout, 184 impl->qvecsout, numinputfields, 185 numoutputfields, Q); 186 CeedChk(ierr); 187 188 // Identity QFunctions 189 if (impl->identityqf) { 190 CeedEvalMode inmode, outmode; 191 CeedQFunctionField *infields, *outfields; 192 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 193 194 for (CeedInt i=0; i<numinputfields; i++) { 195 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 196 CeedChk(ierr); 197 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 198 CeedChk(ierr); 199 200 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 201 impl->qvecsout[i] = impl->qvecsin[i]; 202 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 203 } 204 } 205 206 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 207 208 return 0; 209 } 210 211 //------------------------------------------------------------------------------ 212 // Setup Input Fields 213 //------------------------------------------------------------------------------ 214 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 215 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 216 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 217 CeedInt ierr; 218 CeedEvalMode emode; 219 CeedVector vec; 220 uint64_t state; 221 222 for (CeedInt i=0; i<numinputfields; i++) { 223 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 224 CeedChk(ierr); 225 if (emode == CEED_EVAL_WEIGHT) { // Skip 226 } else { 227 // Get input vector 228 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 229 if (vec != CEED_VECTOR_ACTIVE) { 230 // Restrict 231 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 232 if (state != impl->inputstate[i]) { 233 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 234 vec, impl->evecs[i], request); 235 CeedChk(ierr); 236 impl->inputstate[i] = state; 237 } 238 } else { 239 // Set Qvec for CEED_EVAL_NONE 240 if (emode == CEED_EVAL_NONE) { 241 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 242 &impl->edata[i]); CeedChk(ierr); 243 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 244 CEED_USE_POINTER, 245 impl->edata[i]); CeedChk(ierr); 246 ierr = CeedVectorRestoreArray(impl->evecsin[i], 247 &impl->edata[i]); CeedChk(ierr); 248 } 249 } 250 // Get evec 251 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 252 (const CeedScalar **) &impl->edata[i]); 253 CeedChk(ierr); 254 } 255 } 256 return 0; 257 } 258 259 //------------------------------------------------------------------------------ 260 // Input Basis Action 261 //------------------------------------------------------------------------------ 262 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 263 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 264 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 265 CeedOperator_Opt *impl, CeedRequest *request) { 266 CeedInt ierr; 267 CeedInt dim, elemsize, size; 268 CeedElemRestriction Erestrict; 269 CeedEvalMode emode; 270 CeedBasis basis; 271 CeedVector vec; 272 273 for (CeedInt i=0; i<numinputfields; i++) { 274 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 275 // Skip active input 276 if (skipactive) { 277 if (vec == CEED_VECTOR_ACTIVE) 278 continue; 279 } 280 281 CeedInt activein = 0; 282 // Get elemsize, emode, size 283 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 284 CeedChk(ierr); 285 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 286 CeedChk(ierr); 287 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 288 CeedChk(ierr); 289 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 290 // Restrict block active input 291 if (vec == CEED_VECTOR_ACTIVE) { 292 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 293 CEED_NOTRANSPOSE, invec, 294 impl->evecsin[i], request); 295 CeedChk(ierr); 296 activein = 1; 297 } 298 // Basis action 299 switch(emode) { 300 case CEED_EVAL_NONE: 301 if (!activein) { 302 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 303 CEED_USE_POINTER, 304 &impl->edata[i][e*Q*size]); CeedChk(ierr); 305 } 306 break; 307 case CEED_EVAL_INTERP: 308 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 309 CeedChk(ierr); 310 if (!activein) { 311 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 312 CEED_USE_POINTER, 313 &impl->edata[i][e*elemsize*size]); 314 CeedChk(ierr); 315 } 316 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 317 CEED_EVAL_INTERP, impl->evecsin[i], 318 impl->qvecsin[i]); CeedChk(ierr); 319 break; 320 case CEED_EVAL_GRAD: 321 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 322 CeedChk(ierr); 323 if (!activein) { 324 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 325 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 326 CEED_USE_POINTER, 327 &impl->edata[i][e*elemsize*size/dim]); 328 CeedChk(ierr); 329 } 330 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 331 CEED_EVAL_GRAD, impl->evecsin[i], 332 impl->qvecsin[i]); CeedChk(ierr); 333 break; 334 case CEED_EVAL_WEIGHT: 335 break; // No action 336 // LCOV_EXCL_START 337 case CEED_EVAL_DIV: 338 case CEED_EVAL_CURL: { 339 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 340 CeedChk(ierr); 341 Ceed ceed; 342 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 343 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 344 // LCOV_EXCL_STOP 345 } 346 } 347 } 348 return 0; 349 } 350 351 //------------------------------------------------------------------------------ 352 // Output Basis Action 353 //------------------------------------------------------------------------------ 354 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 355 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 356 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 357 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 358 CeedRequest *request) { 359 CeedInt ierr; 360 CeedElemRestriction Erestrict; 361 CeedEvalMode emode; 362 CeedBasis basis; 363 CeedVector vec; 364 365 for (CeedInt i=0; i<numoutputfields; i++) { 366 // Get elemsize, emode, size 367 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 368 CeedChk(ierr); 369 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 370 CeedChk(ierr); 371 // Basis action 372 switch(emode) { 373 case CEED_EVAL_NONE: 374 break; // No action 375 case CEED_EVAL_INTERP: 376 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 377 CeedChk(ierr); 378 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 379 CEED_EVAL_INTERP, impl->qvecsout[i], 380 impl->evecsout[i]); CeedChk(ierr); 381 break; 382 case CEED_EVAL_GRAD: 383 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 384 CeedChk(ierr); 385 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 386 CEED_EVAL_GRAD, impl->qvecsout[i], 387 impl->evecsout[i]); CeedChk(ierr); 388 break; 389 // LCOV_EXCL_START 390 case CEED_EVAL_WEIGHT: { 391 Ceed ceed; 392 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 393 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 394 "evaluation mode"); 395 } 396 case CEED_EVAL_DIV: 397 case CEED_EVAL_CURL: { 398 Ceed ceed; 399 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 400 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 401 // LCOV_EXCL_STOP 402 } 403 } 404 // Restrict output block 405 // Get output vector 406 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 407 if (vec == CEED_VECTOR_ACTIVE) 408 vec = outvec; 409 // Restrict 410 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 411 e/blksize, CEED_TRANSPOSE, 412 impl->evecsout[i], vec, request); 413 CeedChk(ierr); 414 } 415 return 0; 416 } 417 418 //------------------------------------------------------------------------------ 419 // Restore Input Vectors 420 //------------------------------------------------------------------------------ 421 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 422 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 423 bool skipactive, CeedOperator_Opt *impl) { 424 CeedInt ierr; 425 CeedEvalMode emode; 426 427 for (CeedInt i=0; i<numinputfields; i++) { 428 // Skip active inputs 429 if (skipactive) { 430 CeedVector vec; 431 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 432 if (vec == CEED_VECTOR_ACTIVE) 433 continue; 434 } 435 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 436 CeedChk(ierr); 437 if (emode == CEED_EVAL_WEIGHT) { // Skip 438 } else { 439 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 440 (const CeedScalar **) &impl->edata[i]); 441 CeedChk(ierr); 442 } 443 } 444 return 0; 445 } 446 447 //------------------------------------------------------------------------------ 448 // Operator Apply 449 //------------------------------------------------------------------------------ 450 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 451 CeedVector outvec, CeedRequest *request) { 452 int ierr; 453 Ceed ceed; 454 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 455 Ceed_Opt *ceedimpl; 456 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 457 CeedInt blksize = ceedimpl->blksize; 458 CeedOperator_Opt *impl; 459 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 460 CeedInt Q, numinputfields, numoutputfields, numelements; 461 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 462 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 463 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 464 CeedQFunction qf; 465 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 466 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 467 CeedChk(ierr); 468 CeedOperatorField *opinputfields, *opoutputfields; 469 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 470 CeedChk(ierr); 471 CeedQFunctionField *qfinputfields, *qfoutputfields; 472 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 473 CeedChk(ierr); 474 CeedEvalMode emode; 475 476 // Setup 477 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 478 479 // Input Evecs and Restriction 480 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 481 opinputfields, invec, impl, request); 482 CeedChk(ierr); 483 484 // Output Lvecs, Evecs, and Qvecs 485 for (CeedInt i=0; i<numoutputfields; i++) { 486 // Set Qvec if needed 487 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 488 CeedChk(ierr); 489 if (emode == CEED_EVAL_NONE) { 490 // Set qvec to single block evec 491 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 492 &impl->edata[i + numinputfields]); 493 CeedChk(ierr); 494 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 495 CEED_USE_POINTER, 496 impl->edata[i + numinputfields]); CeedChk(ierr); 497 ierr = CeedVectorRestoreArray(impl->evecsout[i], 498 &impl->edata[i + numinputfields]); 499 CeedChk(ierr); 500 } 501 } 502 503 // Loop through elements 504 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 505 // Input basis apply 506 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 507 numinputfields, blksize, invec, false, 508 impl, request); CeedChk(ierr); 509 510 // Q function 511 if (!impl->identityqf) { 512 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 513 CeedChk(ierr); 514 } 515 516 // Output basis apply and restrict 517 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 518 blksize, numinputfields, numoutputfields, 519 op, outvec, impl, request); 520 CeedChk(ierr); 521 } 522 523 // Restore input arrays 524 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 525 opinputfields, false, impl); 526 CeedChk(ierr); 527 528 return 0; 529 } 530 531 //------------------------------------------------------------------------------ 532 // Assemble Linear QFunction 533 //------------------------------------------------------------------------------ 534 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 535 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 536 int ierr; 537 Ceed ceed; 538 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 539 Ceed_Opt *ceedimpl; 540 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 541 const CeedInt blksize = ceedimpl->blksize; 542 CeedOperator_Opt *impl; 543 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 544 CeedInt Q, numinputfields, numoutputfields, numelements, size; 545 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 546 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 547 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 548 CeedQFunction qf; 549 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 550 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 551 CeedChk(ierr); 552 CeedOperatorField *opinputfields, *opoutputfields; 553 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 554 CeedChk(ierr); 555 CeedQFunctionField *qfinputfields, *qfoutputfields; 556 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 557 CeedChk(ierr); 558 CeedVector vec, lvec; 559 CeedInt numactivein = 0, numactiveout = 0; 560 CeedVector *activein = NULL; 561 CeedScalar *a, *tmp; 562 563 // Setup 564 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 565 566 // Check for identity 567 if (impl->identityqf) 568 // LCOV_EXCL_START 569 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 570 // LCOV_EXCL_STOP 571 572 // Input Evecs and Restriction 573 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 574 opinputfields, NULL, impl, request); 575 CeedChk(ierr); 576 577 // Count number of active input fields 578 for (CeedInt i=0; i<numinputfields; i++) { 579 // Get input vector 580 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 581 // Check if active input 582 if (vec == CEED_VECTOR_ACTIVE) { 583 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 584 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 585 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 586 CeedChk(ierr); 587 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 588 for (CeedInt field=0; field<size; field++) { 589 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 590 CeedChk(ierr); 591 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 592 CEED_USE_POINTER, &tmp[field*Q*blksize]); 593 CeedChk(ierr); 594 } 595 numactivein += size; 596 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 597 } 598 } 599 600 // Count number of active output fields 601 for (CeedInt i=0; i<numoutputfields; i++) { 602 // Get output vector 603 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 604 // Check if active output 605 if (vec == CEED_VECTOR_ACTIVE) { 606 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 607 numactiveout += size; 608 } 609 } 610 611 // Check sizes 612 if (!numactivein || !numactiveout) 613 // LCOV_EXCL_START 614 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 615 "and outputs"); 616 // LCOV_EXCL_STOP 617 618 // Setup lvec 619 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 620 &lvec); CeedChk(ierr); 621 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 622 623 // Create output restriction 624 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 625 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, 626 numactivein*numactiveout, 627 numactivein*numactiveout*numelements*Q, 628 strides, rstr); CeedChk(ierr); 629 // Create assembled vector 630 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 631 assembled); CeedChk(ierr); 632 633 // Loop through elements 634 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 635 // Input basis apply 636 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 637 numinputfields, blksize, NULL, true, 638 impl, request); CeedChk(ierr); 639 640 // Assemble QFunction 641 for (CeedInt in=0; in<numactivein; in++) { 642 // Set Inputs 643 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 644 if (numactivein > 1) { 645 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 646 0.0); CeedChk(ierr); 647 } 648 // Set Outputs 649 for (CeedInt out=0; out<numoutputfields; out++) { 650 // Get output vector 651 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 652 CeedChk(ierr); 653 // Check if active output 654 if (vec == CEED_VECTOR_ACTIVE) { 655 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 656 CEED_USE_POINTER, a); CeedChk(ierr); 657 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 658 CeedChk(ierr); 659 a += size*Q*blksize; // Advance the pointer by the size of the output 660 } 661 } 662 // Apply QFunction 663 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 664 CeedChk(ierr); 665 } 666 } 667 668 // Un-set output Qvecs to prevent accidental overwrite of Assembled 669 for (CeedInt out=0; out<numoutputfields; out++) { 670 // Get output vector 671 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 672 CeedChk(ierr); 673 // Check if active output 674 if (vec == CEED_VECTOR_ACTIVE) { 675 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 676 NULL); CeedChk(ierr); 677 } 678 } 679 680 // Restore input arrays 681 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 682 opinputfields, true, impl); 683 CeedChk(ierr); 684 685 // Output blocked restriction 686 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 687 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 688 CeedElemRestriction blkrstr; 689 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 690 numactivein*numactiveout, numactivein*numactiveout*numelements*Q, 691 strides, &blkrstr); CeedChk(ierr); 692 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 693 request); CeedChk(ierr); 694 695 // Cleanup 696 for (CeedInt i=0; i<numactivein; i++) { 697 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 698 } 699 ierr = CeedFree(&activein); CeedChk(ierr); 700 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 701 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 702 703 return 0; 704 } 705 706 //------------------------------------------------------------------------------ 707 // Operator Destroy 708 //------------------------------------------------------------------------------ 709 static int CeedOperatorDestroy_Opt(CeedOperator op) { 710 int ierr; 711 CeedOperator_Opt *impl; 712 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 713 714 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 715 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 716 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 717 } 718 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 719 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 720 ierr = CeedFree(&impl->edata); CeedChk(ierr); 721 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 722 723 for (CeedInt i=0; i<impl->numein; i++) { 724 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 725 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 726 } 727 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 728 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 729 730 for (CeedInt i=0; i<impl->numeout; i++) { 731 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 732 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 733 } 734 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 735 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 736 737 ierr = CeedFree(&impl); CeedChk(ierr); 738 return 0; 739 } 740 741 //------------------------------------------------------------------------------ 742 // Operator Create 743 //------------------------------------------------------------------------------ 744 int CeedOperatorCreate_Opt(CeedOperator op) { 745 int ierr; 746 Ceed ceed; 747 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 748 Ceed_Opt *ceedimpl; 749 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 750 CeedInt blksize = ceedimpl->blksize; 751 CeedOperator_Opt *impl; 752 753 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 754 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 755 756 if (blksize != 1 && blksize != 8) 757 // LCOV_EXCL_START 758 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 759 // LCOV_EXCL_STOP 760 761 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 762 CeedOperatorAssembleLinearQFunction_Opt); 763 CeedChk(ierr); 764 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 765 CeedOperatorApply_Opt); CeedChk(ierr); 766 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 767 CeedOperatorDestroy_Opt); CeedChk(ierr); 768 return 0; 769 } 770 //------------------------------------------------------------------------------ 771