1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 //------------------------------------------------------------------------------ 22 // Setup Input/Output Fields 23 //------------------------------------------------------------------------------ 24 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 25 bool inOrOut, const CeedInt blksize, 26 CeedElemRestriction *blkrestr, 27 CeedVector *fullevecs, CeedVector *evecs, 28 CeedVector *qvecs, CeedInt starte, 29 CeedInt numfields, CeedInt Q) { 30 CeedInt dim, ierr, ncomp, size, P; 31 Ceed ceed; 32 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 33 CeedBasis basis; 34 CeedElemRestriction r; 35 CeedOperatorField *opfields; 36 CeedQFunctionField *qffields; 37 if (inOrOut) { 38 ierr = CeedOperatorGetFields(op, NULL, &opfields); 39 CeedChk(ierr); 40 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 41 CeedChk(ierr); 42 } else { 43 ierr = CeedOperatorGetFields(op, &opfields, NULL); 44 CeedChk(ierr); 45 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 46 CeedChk(ierr); 47 } 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, nnodes; 62 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 66 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 67 blksize, nnodes, ncomp, 68 CEED_MEM_HOST, CEED_COPY_VALUES, 69 data->indices, &blkrestr[i+starte]); 70 CeedChk(ierr); 71 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 72 &fullevecs[i+starte]); 73 CeedChk(ierr); 74 } 75 76 switch(emode) { 77 case CEED_EVAL_NONE: 78 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 79 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 80 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 81 break; 82 case CEED_EVAL_INTERP: 83 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 84 ierr = CeedElemRestrictionGetElementSize(r, &P); 85 CeedChk(ierr); 86 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 87 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 88 break; 89 case CEED_EVAL_GRAD: 90 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 91 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 92 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 93 ierr = CeedElemRestrictionGetElementSize(r, &P); 94 CeedChk(ierr); 95 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 96 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 97 break; 98 case CEED_EVAL_WEIGHT: // Only on input fields 99 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 100 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 101 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 102 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 103 CeedChk(ierr); 104 105 break; 106 case CEED_EVAL_DIV: 107 break; // Not implimented 108 case CEED_EVAL_CURL: 109 break; // Not implimented 110 } 111 } 112 return 0; 113 } 114 115 //------------------------------------------------------------------------------ 116 // Setup Operator 117 //------------------------------------------------------------------------------ 118 static int CeedOperatorSetup_Opt(CeedOperator op) { 119 int ierr; 120 bool setupdone; 121 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 122 if (setupdone) return 0; 123 Ceed ceed; 124 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 125 Ceed_Opt *ceedimpl; 126 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 127 const CeedInt blksize = ceedimpl->blksize; 128 CeedOperator_Opt *impl; 129 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 130 CeedQFunction qf; 131 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 132 CeedInt Q, numinputfields, numoutputfields; 133 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 134 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 135 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 136 CeedChk(ierr); 137 CeedOperatorField *opinputfields, *opoutputfields; 138 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 139 CeedChk(ierr); 140 CeedQFunctionField *qfinputfields, *qfoutputfields; 141 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 142 CeedChk(ierr); 143 144 // Allocate 145 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 146 CeedChk(ierr); 147 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 148 CeedChk(ierr); 149 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 150 CeedChk(ierr); 151 152 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 153 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 154 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 155 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 156 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 157 158 impl->numein = numinputfields; impl->numeout = numoutputfields; 159 160 // Set up infield and outfield pointer arrays 161 // Infields 162 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 163 impl->evecs, impl->evecsin, 164 impl->qvecsin, 0, 165 numinputfields, Q); 166 CeedChk(ierr); 167 // Outfields 168 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 169 impl->evecs, impl->evecsout, 170 impl->qvecsout, numinputfields, 171 numoutputfields, Q); 172 CeedChk(ierr); 173 174 // Identity QFunctions 175 if (impl->identityqf) { 176 CeedEvalMode inmode, outmode; 177 CeedQFunctionField *infields, *outfields; 178 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 179 180 for (CeedInt i=0; i<numinputfields; i++) { 181 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 182 CeedChk(ierr); 183 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 184 CeedChk(ierr); 185 186 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 187 impl->qvecsout[i] = impl->qvecsin[i]; 188 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 189 } 190 } 191 192 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 193 194 return 0; 195 } 196 197 //------------------------------------------------------------------------------ 198 // Setup Input Fields 199 //------------------------------------------------------------------------------ 200 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 201 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 202 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 203 CeedInt ierr; 204 CeedEvalMode emode; 205 CeedVector vec; 206 CeedTransposeMode lmode; 207 uint64_t state; 208 209 for (CeedInt i=0; i<numinputfields; i++) { 210 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 211 CeedChk(ierr); 212 if (emode == CEED_EVAL_WEIGHT) { // Skip 213 } else { 214 // Get input vector 215 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 216 if (vec != CEED_VECTOR_ACTIVE) { 217 // Restrict 218 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 219 if (state != impl->inputstate[i]) { 220 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 221 CeedChk(ierr); 222 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 223 lmode, vec, impl->evecs[i], request); 224 CeedChk(ierr); 225 impl->inputstate[i] = state; 226 } 227 } else { 228 // Set Qvec for CEED_EVAL_NONE 229 if (emode == CEED_EVAL_NONE) { 230 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 231 &impl->edata[i]); CeedChk(ierr); 232 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 233 CEED_USE_POINTER, 234 impl->edata[i]); CeedChk(ierr); 235 ierr = CeedVectorRestoreArray(impl->evecsin[i], 236 &impl->edata[i]); CeedChk(ierr); 237 } 238 } 239 // Get evec 240 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 241 (const CeedScalar **) &impl->edata[i]); 242 CeedChk(ierr); 243 } 244 } 245 return 0; 246 } 247 248 //------------------------------------------------------------------------------ 249 // Input Basis Action 250 //------------------------------------------------------------------------------ 251 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 252 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 253 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 254 CeedOperator_Opt *impl, CeedRequest *request) { 255 CeedInt ierr; 256 CeedInt dim, elemsize, size; 257 CeedElemRestriction Erestrict; 258 CeedEvalMode emode; 259 CeedBasis basis; 260 CeedVector vec; 261 CeedTransposeMode lmode; 262 263 for (CeedInt i=0; i<numinputfields; i++) { 264 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 265 // Skip active input 266 if (skipactive) { 267 if (vec == CEED_VECTOR_ACTIVE) 268 continue; 269 } 270 271 CeedInt activein = 0; 272 // Get elemsize, emode, size 273 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 274 CeedChk(ierr); 275 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 276 CeedChk(ierr); 277 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 278 CeedChk(ierr); 279 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 280 // Restrict block active input 281 if (vec == CEED_VECTOR_ACTIVE) { 282 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 283 CeedChk(ierr); 284 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 285 CEED_NOTRANSPOSE, lmode, invec, 286 impl->evecsin[i], request); 287 CeedChk(ierr); 288 activein = 1; 289 } 290 // Basis action 291 switch(emode) { 292 case CEED_EVAL_NONE: 293 if (!activein) { 294 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 295 CEED_USE_POINTER, 296 &impl->edata[i][e*Q*size]); CeedChk(ierr); 297 } 298 break; 299 case CEED_EVAL_INTERP: 300 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 301 CeedChk(ierr); 302 if (!activein) { 303 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 304 CEED_USE_POINTER, 305 &impl->edata[i][e*elemsize*size]); 306 CeedChk(ierr); 307 } 308 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 309 CEED_EVAL_INTERP, impl->evecsin[i], 310 impl->qvecsin[i]); CeedChk(ierr); 311 break; 312 case CEED_EVAL_GRAD: 313 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 314 CeedChk(ierr); 315 if (!activein) { 316 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 317 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 318 CEED_USE_POINTER, 319 &impl->edata[i][e*elemsize*size/dim]); 320 CeedChk(ierr); 321 } 322 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 323 CEED_EVAL_GRAD, impl->evecsin[i], 324 impl->qvecsin[i]); CeedChk(ierr); 325 break; 326 case CEED_EVAL_WEIGHT: 327 break; // No action 328 case CEED_EVAL_DIV: 329 case CEED_EVAL_CURL: { 330 // LCOV_EXCL_START 331 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 332 CeedChk(ierr); 333 Ceed ceed; 334 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 335 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 336 // LCOV_EXCL_STOP 337 break; // Not implemented 338 } 339 } 340 } 341 return 0; 342 } 343 344 //------------------------------------------------------------------------------ 345 // Output Basis Action 346 //------------------------------------------------------------------------------ 347 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 348 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 349 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 350 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 351 CeedRequest *request) { 352 CeedInt ierr; 353 CeedElemRestriction Erestrict; 354 CeedEvalMode emode; 355 CeedBasis basis; 356 CeedVector vec; 357 CeedTransposeMode lmode; 358 359 for (CeedInt i=0; i<numoutputfields; i++) { 360 // Get elemsize, emode, size 361 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 362 CeedChk(ierr); 363 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 364 CeedChk(ierr); 365 // Basis action 366 switch(emode) { 367 case CEED_EVAL_NONE: 368 break; // No action 369 case CEED_EVAL_INTERP: 370 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 371 CeedChk(ierr); 372 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 373 CEED_EVAL_INTERP, impl->qvecsout[i], 374 impl->evecsout[i]); CeedChk(ierr); 375 break; 376 case CEED_EVAL_GRAD: 377 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 378 CeedChk(ierr); 379 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 380 CEED_EVAL_GRAD, impl->qvecsout[i], 381 impl->evecsout[i]); CeedChk(ierr); 382 break; 383 case CEED_EVAL_WEIGHT: { 384 // LCOV_EXCL_START 385 Ceed ceed; 386 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 387 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 388 "evaluation mode"); 389 // LCOV_EXCL_STOP 390 break; // Should not occur 391 } 392 case CEED_EVAL_DIV: 393 case CEED_EVAL_CURL: { 394 // LCOV_EXCL_START 395 Ceed ceed; 396 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 397 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 398 // LCOV_EXCL_STOP 399 break; // Not implemented 400 } 401 } 402 // Restrict output block 403 // Get output vector 404 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 405 if (vec == CEED_VECTOR_ACTIVE) 406 vec = outvec; 407 // Restrict 408 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 409 CeedChk(ierr); 410 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 411 e/blksize, CEED_TRANSPOSE, 412 lmode, impl->evecsout[i], 413 vec, request); CeedChk(ierr); 414 } 415 return 0; 416 } 417 418 //------------------------------------------------------------------------------ 419 // Restore Input Vectors 420 //------------------------------------------------------------------------------ 421 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 422 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 423 bool skipactive, CeedOperator_Opt *impl) { 424 CeedInt ierr; 425 CeedEvalMode emode; 426 427 for (CeedInt i=0; i<numinputfields; i++) { 428 // Skip active inputs 429 if (skipactive) { 430 CeedVector vec; 431 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 432 if (vec == CEED_VECTOR_ACTIVE) 433 continue; 434 } 435 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 436 CeedChk(ierr); 437 if (emode == CEED_EVAL_WEIGHT) { // Skip 438 } else { 439 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 440 (const CeedScalar **) &impl->edata[i]); 441 CeedChk(ierr); 442 } 443 } 444 return 0; 445 } 446 447 //------------------------------------------------------------------------------ 448 // Operator Apply 449 //------------------------------------------------------------------------------ 450 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 451 CeedVector outvec, CeedRequest *request) { 452 int ierr; 453 Ceed ceed; 454 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 455 Ceed_Opt *ceedimpl; 456 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 457 CeedInt blksize = ceedimpl->blksize; 458 CeedOperator_Opt *impl; 459 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 460 CeedInt Q, numinputfields, numoutputfields, numelements; 461 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 462 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 463 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 464 CeedQFunction qf; 465 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 466 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 467 CeedChk(ierr); 468 CeedOperatorField *opinputfields, *opoutputfields; 469 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 470 CeedChk(ierr); 471 CeedQFunctionField *qfinputfields, *qfoutputfields; 472 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 473 CeedChk(ierr); 474 CeedEvalMode emode; 475 476 // Setup 477 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 478 479 // Input Evecs and Restriction 480 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 481 opinputfields, invec, impl, request); 482 CeedChk(ierr); 483 484 // Output Lvecs, Evecs, and Qvecs 485 for (CeedInt i=0; i<numoutputfields; i++) { 486 // Set Qvec if needed 487 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 488 CeedChk(ierr); 489 if (emode == CEED_EVAL_NONE) { 490 // Set qvec to single block evec 491 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 492 &impl->edata[i + numinputfields]); 493 CeedChk(ierr); 494 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 495 CEED_USE_POINTER, 496 impl->edata[i + numinputfields]); CeedChk(ierr); 497 ierr = CeedVectorRestoreArray(impl->evecsout[i], 498 &impl->edata[i + numinputfields]); 499 CeedChk(ierr); 500 } 501 } 502 503 // Loop through elements 504 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 505 // Input basis apply 506 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 507 numinputfields, blksize, invec, false, 508 impl, request); CeedChk(ierr); 509 510 // Q function 511 if (!impl->identityqf) { 512 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 513 CeedChk(ierr); 514 } 515 516 // Output basis apply and restrict 517 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 518 blksize, numinputfields, numoutputfields, 519 op, outvec, impl, request); 520 CeedChk(ierr); 521 } 522 523 // Restore input arrays 524 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 525 opinputfields, false, impl); 526 CeedChk(ierr); 527 528 return 0; 529 } 530 531 //------------------------------------------------------------------------------ 532 // Assemble Linear QFunction 533 //------------------------------------------------------------------------------ 534 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 535 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 536 int ierr; 537 Ceed ceed; 538 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 539 Ceed_Opt *ceedimpl; 540 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 541 const CeedInt blksize = ceedimpl->blksize; 542 CeedOperator_Opt *impl; 543 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 544 CeedInt Q, numinputfields, numoutputfields, numelements, size; 545 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 546 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 547 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 548 CeedQFunction qf; 549 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 550 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 551 CeedChk(ierr); 552 CeedOperatorField *opinputfields, *opoutputfields; 553 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 554 CeedChk(ierr); 555 CeedQFunctionField *qfinputfields, *qfoutputfields; 556 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 557 CeedChk(ierr); 558 CeedVector vec, lvec; 559 CeedInt numactivein = 0, numactiveout = 0; 560 CeedVector *activein = NULL; 561 CeedScalar *a, *tmp; 562 563 // Setup 564 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 565 566 // Check for identity 567 if (impl->identityqf) 568 // LCOV_EXCL_START 569 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 570 // LCOV_EXCL_STOP 571 572 // Input Evecs and Restriction 573 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 574 opinputfields, NULL, impl, request); 575 CeedChk(ierr); 576 577 // Count number of active input fields 578 for (CeedInt i=0; i<numinputfields; i++) { 579 // Get input vector 580 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 581 // Check if active input 582 if (vec == CEED_VECTOR_ACTIVE) { 583 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 584 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 585 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 586 CeedChk(ierr); 587 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 588 for (CeedInt field=0; field<size; field++) { 589 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 590 CeedChk(ierr); 591 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 592 CEED_USE_POINTER, &tmp[field*Q*blksize]); 593 CeedChk(ierr); 594 } 595 numactivein += size; 596 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 597 } 598 } 599 600 // Count number of active output fields 601 for (CeedInt i=0; i<numoutputfields; i++) { 602 // Get output vector 603 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 604 // Check if active output 605 if (vec == CEED_VECTOR_ACTIVE) { 606 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 607 numactiveout += size; 608 } 609 } 610 611 // Check sizes 612 if (!numactivein || !numactiveout) 613 // LCOV_EXCL_START 614 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 615 "and outputs"); 616 // LCOV_EXCL_STOP 617 618 // Setup lvec 619 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 620 &lvec); CeedChk(ierr); 621 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 622 623 // Create output restriction 624 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 625 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 626 // Create assembled vector 627 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 628 assembled); CeedChk(ierr); 629 630 // Loop through elements 631 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 632 // Input basis apply 633 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 634 numinputfields, blksize, NULL, true, 635 impl, request); CeedChk(ierr); 636 637 // Assemble QFunction 638 for (CeedInt in=0; in<numactivein; in++) { 639 // Set Inputs 640 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 641 if (numactivein > 1) { 642 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 643 0.0); CeedChk(ierr); 644 } 645 // Set Outputs 646 for (CeedInt out=0; out<numoutputfields; out++) { 647 // Get output vector 648 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 649 CeedChk(ierr); 650 // Check if active output 651 if (vec == CEED_VECTOR_ACTIVE) { 652 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 653 CEED_USE_POINTER, a); CeedChk(ierr); 654 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 655 CeedChk(ierr); 656 a += size*Q*blksize; // Advance the pointer by the size of the output 657 } 658 } 659 // Apply QFunction 660 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 661 CeedChk(ierr); 662 } 663 } 664 665 // Un-set output Qvecs to prevent accidental overwrite of Assembled 666 for (CeedInt out=0; out<numoutputfields; out++) { 667 // Get output vector 668 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 669 CeedChk(ierr); 670 // Check if active output 671 if (vec == CEED_VECTOR_ACTIVE) { 672 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 673 NULL); CeedChk(ierr); 674 } 675 } 676 677 // Restore input arrays 678 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 679 opinputfields, true, impl); 680 CeedChk(ierr); 681 682 // Output blocked restriction 683 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 684 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 685 CeedElemRestriction blkrstr; 686 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 687 numelements*Q, 688 numactivein*numactiveout, 689 CEED_MEM_HOST, CEED_COPY_VALUES, 690 NULL, &blkrstr); CeedChk(ierr); 691 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 692 lvec, *assembled, request); CeedChk(ierr); 693 694 // Cleanup 695 for (CeedInt i=0; i<numactivein; i++) { 696 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 697 } 698 ierr = CeedFree(&activein); CeedChk(ierr); 699 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 700 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 701 702 return 0; 703 } 704 705 //------------------------------------------------------------------------------ 706 // Operator Destroy 707 //------------------------------------------------------------------------------ 708 static int CeedOperatorDestroy_Opt(CeedOperator op) { 709 int ierr; 710 CeedOperator_Opt *impl; 711 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 712 713 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 714 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 715 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 716 } 717 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 718 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 719 ierr = CeedFree(&impl->edata); CeedChk(ierr); 720 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 721 722 for (CeedInt i=0; i<impl->numein; i++) { 723 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 724 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 725 } 726 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 727 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 728 729 for (CeedInt i=0; i<impl->numeout; i++) { 730 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 731 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 732 } 733 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 734 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 735 736 ierr = CeedFree(&impl); CeedChk(ierr); 737 return 0; 738 } 739 740 //------------------------------------------------------------------------------ 741 // Operator Create 742 //------------------------------------------------------------------------------ 743 int CeedOperatorCreate_Opt(CeedOperator op) { 744 int ierr; 745 Ceed ceed; 746 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 747 Ceed_Opt *ceedimpl; 748 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 749 CeedInt blksize = ceedimpl->blksize; 750 CeedOperator_Opt *impl; 751 752 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 753 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 754 755 if (blksize != 1 && blksize != 8) 756 // LCOV_EXCL_START 757 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 758 // LCOV_EXCL_STOP 759 760 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 761 CeedOperatorAssembleLinearQFunction_Opt); 762 CeedChk(ierr); 763 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 764 CeedOperatorApply_Opt); CeedChk(ierr); 765 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 766 CeedOperatorDestroy_Opt); CeedChk(ierr); 767 return 0; 768 } 769 //------------------------------------------------------------------------------ 770