1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 //------------------------------------------------------------------------------ 22 // Setup Input/Output Fields 23 //------------------------------------------------------------------------------ 24 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 25 bool inOrOut, const CeedInt blksize, 26 CeedElemRestriction *blkrestr, 27 CeedVector *fullevecs, CeedVector *evecs, 28 CeedVector *qvecs, CeedInt starte, 29 CeedInt numfields, CeedInt Q) { 30 CeedInt dim, ierr, ncomp, size, P; 31 Ceed ceed; 32 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 33 CeedBasis basis; 34 CeedElemRestriction r; 35 CeedOperatorField *opfields; 36 CeedQFunctionField *qffields; 37 if (inOrOut) { 38 ierr = CeedOperatorGetFields(op, NULL, &opfields); 39 CeedChk(ierr); 40 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 41 CeedChk(ierr); 42 } else { 43 ierr = CeedOperatorGetFields(op, &opfields, NULL); 44 CeedChk(ierr); 45 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 46 CeedChk(ierr); 47 } 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, nnodes; 62 CeedInterlaceMode imode; 63 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 66 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 68 ierr = CeedElemRestrictionCreateBlocked(ceed, imode, nelem, elemsize, 69 blksize, nnodes, ncomp, 70 CEED_MEM_HOST, CEED_COPY_VALUES, 71 data->indices, &blkrestr[i+starte]); 72 CeedChk(ierr); 73 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 74 &fullevecs[i+starte]); 75 CeedChk(ierr); 76 } 77 78 switch(emode) { 79 case CEED_EVAL_NONE: 80 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 81 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 82 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 83 break; 84 case CEED_EVAL_INTERP: 85 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 86 ierr = CeedElemRestrictionGetElementSize(r, &P); 87 CeedChk(ierr); 88 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 89 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 90 break; 91 case CEED_EVAL_GRAD: 92 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 93 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 94 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &P); 96 CeedChk(ierr); 97 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 98 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 99 break; 100 case CEED_EVAL_WEIGHT: // Only on input fields 101 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 102 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 103 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 104 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 105 CeedChk(ierr); 106 107 break; 108 case CEED_EVAL_DIV: 109 break; // Not implimented 110 case CEED_EVAL_CURL: 111 break; // Not implimented 112 } 113 } 114 return 0; 115 } 116 117 //------------------------------------------------------------------------------ 118 // Setup Operator 119 //------------------------------------------------------------------------------ 120 static int CeedOperatorSetup_Opt(CeedOperator op) { 121 int ierr; 122 bool setupdone; 123 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 124 if (setupdone) return 0; 125 Ceed ceed; 126 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 127 Ceed_Opt *ceedimpl; 128 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 129 const CeedInt blksize = ceedimpl->blksize; 130 CeedOperator_Opt *impl; 131 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 132 CeedQFunction qf; 133 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 134 CeedInt Q, numinputfields, numoutputfields; 135 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 136 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 137 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 138 CeedChk(ierr); 139 CeedOperatorField *opinputfields, *opoutputfields; 140 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 141 CeedChk(ierr); 142 CeedQFunctionField *qfinputfields, *qfoutputfields; 143 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 144 CeedChk(ierr); 145 146 // Allocate 147 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 148 CeedChk(ierr); 149 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 150 CeedChk(ierr); 151 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 152 CeedChk(ierr); 153 154 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 155 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 156 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 157 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 158 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 159 160 impl->numein = numinputfields; impl->numeout = numoutputfields; 161 162 // Set up infield and outfield pointer arrays 163 // Infields 164 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 165 impl->evecs, impl->evecsin, 166 impl->qvecsin, 0, 167 numinputfields, Q); 168 CeedChk(ierr); 169 // Outfields 170 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 171 impl->evecs, impl->evecsout, 172 impl->qvecsout, numinputfields, 173 numoutputfields, Q); 174 CeedChk(ierr); 175 176 // Identity QFunctions 177 if (impl->identityqf) { 178 CeedEvalMode inmode, outmode; 179 CeedQFunctionField *infields, *outfields; 180 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 181 182 for (CeedInt i=0; i<numinputfields; i++) { 183 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 184 CeedChk(ierr); 185 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 186 CeedChk(ierr); 187 188 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 189 impl->qvecsout[i] = impl->qvecsin[i]; 190 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 191 } 192 } 193 194 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 195 196 return 0; 197 } 198 199 //------------------------------------------------------------------------------ 200 // Setup Input Fields 201 //------------------------------------------------------------------------------ 202 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 203 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 204 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 205 CeedInt ierr; 206 CeedEvalMode emode; 207 CeedVector vec; 208 uint64_t state; 209 210 for (CeedInt i=0; i<numinputfields; i++) { 211 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 212 CeedChk(ierr); 213 if (emode == CEED_EVAL_WEIGHT) { // Skip 214 } else { 215 // Get input vector 216 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 217 if (vec != CEED_VECTOR_ACTIVE) { 218 // Restrict 219 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 220 if (state != impl->inputstate[i]) { 221 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 222 vec, impl->evecs[i], request); 223 CeedChk(ierr); 224 impl->inputstate[i] = state; 225 } 226 } else { 227 // Set Qvec for CEED_EVAL_NONE 228 if (emode == CEED_EVAL_NONE) { 229 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 230 &impl->edata[i]); CeedChk(ierr); 231 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 232 CEED_USE_POINTER, 233 impl->edata[i]); CeedChk(ierr); 234 ierr = CeedVectorRestoreArray(impl->evecsin[i], 235 &impl->edata[i]); CeedChk(ierr); 236 } 237 } 238 // Get evec 239 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 240 (const CeedScalar **) &impl->edata[i]); 241 CeedChk(ierr); 242 } 243 } 244 return 0; 245 } 246 247 //------------------------------------------------------------------------------ 248 // Input Basis Action 249 //------------------------------------------------------------------------------ 250 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 251 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 252 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 253 CeedOperator_Opt *impl, CeedRequest *request) { 254 CeedInt ierr; 255 CeedInt dim, elemsize, size; 256 CeedElemRestriction Erestrict; 257 CeedEvalMode emode; 258 CeedBasis basis; 259 CeedVector vec; 260 261 for (CeedInt i=0; i<numinputfields; i++) { 262 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 263 // Skip active input 264 if (skipactive) { 265 if (vec == CEED_VECTOR_ACTIVE) 266 continue; 267 } 268 269 CeedInt activein = 0; 270 // Get elemsize, emode, size 271 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 272 CeedChk(ierr); 273 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 274 CeedChk(ierr); 275 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 276 CeedChk(ierr); 277 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 278 // Restrict block active input 279 if (vec == CEED_VECTOR_ACTIVE) { 280 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 281 CEED_NOTRANSPOSE, invec, 282 impl->evecsin[i], request); 283 CeedChk(ierr); 284 activein = 1; 285 } 286 // Basis action 287 switch(emode) { 288 case CEED_EVAL_NONE: 289 if (!activein) { 290 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 291 CEED_USE_POINTER, 292 &impl->edata[i][e*Q*size]); CeedChk(ierr); 293 } 294 break; 295 case CEED_EVAL_INTERP: 296 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 297 CeedChk(ierr); 298 if (!activein) { 299 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 300 CEED_USE_POINTER, 301 &impl->edata[i][e*elemsize*size]); 302 CeedChk(ierr); 303 } 304 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 305 CEED_EVAL_INTERP, impl->evecsin[i], 306 impl->qvecsin[i]); CeedChk(ierr); 307 break; 308 case CEED_EVAL_GRAD: 309 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 310 CeedChk(ierr); 311 if (!activein) { 312 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 313 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 314 CEED_USE_POINTER, 315 &impl->edata[i][e*elemsize*size/dim]); 316 CeedChk(ierr); 317 } 318 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 319 CEED_EVAL_GRAD, impl->evecsin[i], 320 impl->qvecsin[i]); CeedChk(ierr); 321 break; 322 case CEED_EVAL_WEIGHT: 323 break; // No action 324 case CEED_EVAL_DIV: 325 case CEED_EVAL_CURL: { 326 // LCOV_EXCL_START 327 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 328 CeedChk(ierr); 329 Ceed ceed; 330 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 331 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 332 // LCOV_EXCL_STOP 333 break; // Not implemented 334 } 335 } 336 } 337 return 0; 338 } 339 340 //------------------------------------------------------------------------------ 341 // Output Basis Action 342 //------------------------------------------------------------------------------ 343 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 344 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 345 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 346 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 347 CeedRequest *request) { 348 CeedInt ierr; 349 CeedElemRestriction Erestrict; 350 CeedEvalMode emode; 351 CeedBasis basis; 352 CeedVector vec; 353 354 for (CeedInt i=0; i<numoutputfields; i++) { 355 // Get elemsize, emode, size 356 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 357 CeedChk(ierr); 358 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 359 CeedChk(ierr); 360 // Basis action 361 switch(emode) { 362 case CEED_EVAL_NONE: 363 break; // No action 364 case CEED_EVAL_INTERP: 365 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 366 CeedChk(ierr); 367 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 368 CEED_EVAL_INTERP, impl->qvecsout[i], 369 impl->evecsout[i]); CeedChk(ierr); 370 break; 371 case CEED_EVAL_GRAD: 372 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 373 CeedChk(ierr); 374 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 375 CEED_EVAL_GRAD, impl->qvecsout[i], 376 impl->evecsout[i]); CeedChk(ierr); 377 break; 378 case CEED_EVAL_WEIGHT: { 379 // LCOV_EXCL_START 380 Ceed ceed; 381 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 382 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 383 "evaluation mode"); 384 // LCOV_EXCL_STOP 385 break; // Should not occur 386 } 387 case CEED_EVAL_DIV: 388 case CEED_EVAL_CURL: { 389 // LCOV_EXCL_START 390 Ceed ceed; 391 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 392 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 393 // LCOV_EXCL_STOP 394 break; // Not implemented 395 } 396 } 397 // Restrict output block 398 // Get output vector 399 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 400 if (vec == CEED_VECTOR_ACTIVE) 401 vec = outvec; 402 // Restrict 403 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 404 e/blksize, CEED_TRANSPOSE, 405 impl->evecsout[i], vec, request); 406 CeedChk(ierr); 407 } 408 return 0; 409 } 410 411 //------------------------------------------------------------------------------ 412 // Restore Input Vectors 413 //------------------------------------------------------------------------------ 414 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 415 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 416 bool skipactive, CeedOperator_Opt *impl) { 417 CeedInt ierr; 418 CeedEvalMode emode; 419 420 for (CeedInt i=0; i<numinputfields; i++) { 421 // Skip active inputs 422 if (skipactive) { 423 CeedVector vec; 424 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 425 if (vec == CEED_VECTOR_ACTIVE) 426 continue; 427 } 428 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 429 CeedChk(ierr); 430 if (emode == CEED_EVAL_WEIGHT) { // Skip 431 } else { 432 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 433 (const CeedScalar **) &impl->edata[i]); 434 CeedChk(ierr); 435 } 436 } 437 return 0; 438 } 439 440 //------------------------------------------------------------------------------ 441 // Operator Apply 442 //------------------------------------------------------------------------------ 443 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 444 CeedVector outvec, CeedRequest *request) { 445 int ierr; 446 Ceed ceed; 447 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 448 Ceed_Opt *ceedimpl; 449 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 450 CeedInt blksize = ceedimpl->blksize; 451 CeedOperator_Opt *impl; 452 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 453 CeedInt Q, numinputfields, numoutputfields, numelements; 454 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 455 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 456 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 457 CeedQFunction qf; 458 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 459 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 460 CeedChk(ierr); 461 CeedOperatorField *opinputfields, *opoutputfields; 462 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 463 CeedChk(ierr); 464 CeedQFunctionField *qfinputfields, *qfoutputfields; 465 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 466 CeedChk(ierr); 467 CeedEvalMode emode; 468 469 // Setup 470 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 471 472 // Input Evecs and Restriction 473 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 474 opinputfields, invec, impl, request); 475 CeedChk(ierr); 476 477 // Output Lvecs, Evecs, and Qvecs 478 for (CeedInt i=0; i<numoutputfields; i++) { 479 // Set Qvec if needed 480 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 481 CeedChk(ierr); 482 if (emode == CEED_EVAL_NONE) { 483 // Set qvec to single block evec 484 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 485 &impl->edata[i + numinputfields]); 486 CeedChk(ierr); 487 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 488 CEED_USE_POINTER, 489 impl->edata[i + numinputfields]); CeedChk(ierr); 490 ierr = CeedVectorRestoreArray(impl->evecsout[i], 491 &impl->edata[i + numinputfields]); 492 CeedChk(ierr); 493 } 494 } 495 496 // Loop through elements 497 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 498 // Input basis apply 499 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 500 numinputfields, blksize, invec, false, 501 impl, request); CeedChk(ierr); 502 503 // Q function 504 if (!impl->identityqf) { 505 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 506 CeedChk(ierr); 507 } 508 509 // Output basis apply and restrict 510 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 511 blksize, numinputfields, numoutputfields, 512 op, outvec, impl, request); 513 CeedChk(ierr); 514 } 515 516 // Restore input arrays 517 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 518 opinputfields, false, impl); 519 CeedChk(ierr); 520 521 return 0; 522 } 523 524 //------------------------------------------------------------------------------ 525 // Assemble Linear QFunction 526 //------------------------------------------------------------------------------ 527 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 528 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 529 int ierr; 530 Ceed ceed; 531 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 532 Ceed_Opt *ceedimpl; 533 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 534 const CeedInt blksize = ceedimpl->blksize; 535 CeedOperator_Opt *impl; 536 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 537 CeedInt Q, numinputfields, numoutputfields, numelements, size; 538 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 539 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 540 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 541 CeedQFunction qf; 542 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 543 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 544 CeedChk(ierr); 545 CeedOperatorField *opinputfields, *opoutputfields; 546 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 547 CeedChk(ierr); 548 CeedQFunctionField *qfinputfields, *qfoutputfields; 549 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 550 CeedChk(ierr); 551 CeedVector vec, lvec; 552 CeedInt numactivein = 0, numactiveout = 0; 553 CeedVector *activein = NULL; 554 CeedScalar *a, *tmp; 555 556 // Setup 557 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 558 559 // Check for identity 560 if (impl->identityqf) 561 // LCOV_EXCL_START 562 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 563 // LCOV_EXCL_STOP 564 565 // Input Evecs and Restriction 566 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 567 opinputfields, NULL, impl, request); 568 CeedChk(ierr); 569 570 // Count number of active input fields 571 for (CeedInt i=0; i<numinputfields; i++) { 572 // Get input vector 573 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 574 // Check if active input 575 if (vec == CEED_VECTOR_ACTIVE) { 576 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 577 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 578 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 579 CeedChk(ierr); 580 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 581 for (CeedInt field=0; field<size; field++) { 582 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 583 CeedChk(ierr); 584 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 585 CEED_USE_POINTER, &tmp[field*Q*blksize]); 586 CeedChk(ierr); 587 } 588 numactivein += size; 589 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 590 } 591 } 592 593 // Count number of active output fields 594 for (CeedInt i=0; i<numoutputfields; i++) { 595 // Get output vector 596 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 597 // Check if active output 598 if (vec == CEED_VECTOR_ACTIVE) { 599 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 600 numactiveout += size; 601 } 602 } 603 604 // Check sizes 605 if (!numactivein || !numactiveout) 606 // LCOV_EXCL_START 607 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 608 "and outputs"); 609 // LCOV_EXCL_STOP 610 611 // Setup lvec 612 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 613 &lvec); CeedChk(ierr); 614 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 615 616 // Create output restriction 617 CeedInterlaceMode imode = CEED_NONINTERLACED; 618 ierr = CeedElemRestrictionCreateIdentity(ceed, imode, numelements, Q, 619 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 620 // Create assembled vector 621 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 622 assembled); CeedChk(ierr); 623 624 // Loop through elements 625 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 626 // Input basis apply 627 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 628 numinputfields, blksize, NULL, true, 629 impl, request); CeedChk(ierr); 630 631 // Assemble QFunction 632 for (CeedInt in=0; in<numactivein; in++) { 633 // Set Inputs 634 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 635 if (numactivein > 1) { 636 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 637 0.0); CeedChk(ierr); 638 } 639 // Set Outputs 640 for (CeedInt out=0; out<numoutputfields; out++) { 641 // Get output vector 642 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 643 CeedChk(ierr); 644 // Check if active output 645 if (vec == CEED_VECTOR_ACTIVE) { 646 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 647 CEED_USE_POINTER, a); CeedChk(ierr); 648 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 649 CeedChk(ierr); 650 a += size*Q*blksize; // Advance the pointer by the size of the output 651 } 652 } 653 // Apply QFunction 654 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 655 CeedChk(ierr); 656 } 657 } 658 659 // Un-set output Qvecs to prevent accidental overwrite of Assembled 660 for (CeedInt out=0; out<numoutputfields; out++) { 661 // Get output vector 662 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 663 CeedChk(ierr); 664 // Check if active output 665 if (vec == CEED_VECTOR_ACTIVE) { 666 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 667 NULL); CeedChk(ierr); 668 } 669 } 670 671 // Restore input arrays 672 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 673 opinputfields, true, impl); 674 CeedChk(ierr); 675 676 // Output blocked restriction 677 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 678 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 679 CeedElemRestriction blkrstr; 680 ierr = CeedElemRestrictionCreateBlocked(ceed, imode, numelements, Q, blksize, 681 numelements*Q, 682 numactivein*numactiveout, 683 CEED_MEM_HOST, CEED_COPY_VALUES, 684 NULL, &blkrstr); CeedChk(ierr); 685 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 686 request); CeedChk(ierr); 687 688 // Cleanup 689 for (CeedInt i=0; i<numactivein; i++) { 690 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 691 } 692 ierr = CeedFree(&activein); CeedChk(ierr); 693 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 694 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 695 696 return 0; 697 } 698 699 //------------------------------------------------------------------------------ 700 // Operator Destroy 701 //------------------------------------------------------------------------------ 702 static int CeedOperatorDestroy_Opt(CeedOperator op) { 703 int ierr; 704 CeedOperator_Opt *impl; 705 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 706 707 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 708 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 709 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 710 } 711 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 712 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 713 ierr = CeedFree(&impl->edata); CeedChk(ierr); 714 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 715 716 for (CeedInt i=0; i<impl->numein; i++) { 717 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 718 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 719 } 720 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 721 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 722 723 for (CeedInt i=0; i<impl->numeout; i++) { 724 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 725 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 726 } 727 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 728 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 729 730 ierr = CeedFree(&impl); CeedChk(ierr); 731 return 0; 732 } 733 734 //------------------------------------------------------------------------------ 735 // Operator Create 736 //------------------------------------------------------------------------------ 737 int CeedOperatorCreate_Opt(CeedOperator op) { 738 int ierr; 739 Ceed ceed; 740 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 741 Ceed_Opt *ceedimpl; 742 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 743 CeedInt blksize = ceedimpl->blksize; 744 CeedOperator_Opt *impl; 745 746 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 747 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 748 749 if (blksize != 1 && blksize != 8) 750 // LCOV_EXCL_START 751 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 752 // LCOV_EXCL_STOP 753 754 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 755 CeedOperatorAssembleLinearQFunction_Opt); 756 CeedChk(ierr); 757 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 758 CeedOperatorApply_Opt); CeedChk(ierr); 759 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 760 CeedOperatorDestroy_Opt); CeedChk(ierr); 761 return 0; 762 } 763 //------------------------------------------------------------------------------ 764