1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 static int CeedOperatorDestroy_Opt(CeedOperator op) { 22 int ierr; 23 CeedOperator_Opt *impl; 24 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 25 26 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29 } 30 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32 ierr = CeedFree(&impl->edata); CeedChk(ierr); 33 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 34 35 for (CeedInt i=0; i<impl->numein; i++) { 36 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 37 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 38 } 39 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 40 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 41 42 for (CeedInt i=0; i<impl->numeout; i++) { 43 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 44 if (!impl->identityqf) { 45 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 46 } 47 } 48 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 49 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 50 51 ierr = CeedFree(&impl); CeedChk(ierr); 52 return 0; 53 } 54 55 /* 56 Setup infields or outfields 57 */ 58 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 59 bool inOrOut, const CeedInt blksize, 60 CeedElemRestriction *blkrestr, 61 CeedVector *fullevecs, CeedVector *evecs, 62 CeedVector *qvecs, CeedInt starte, 63 CeedInt numfields, CeedInt Q) { 64 CeedInt dim, ierr, ncomp, size, P; 65 Ceed ceed; 66 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 67 CeedBasis basis; 68 CeedElemRestriction r; 69 CeedOperatorField *opfields; 70 CeedQFunctionField *qffields; 71 if (inOrOut) { 72 ierr = CeedOperatorGetFields(op, NULL, &opfields); 73 CeedChk(ierr); 74 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 75 CeedChk(ierr); 76 } else { 77 ierr = CeedOperatorGetFields(op, &opfields, NULL); 78 CeedChk(ierr); 79 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 80 CeedChk(ierr); 81 } 82 83 // Loop over fields 84 for (CeedInt i=0; i<numfields; i++) { 85 CeedEvalMode emode; 86 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 87 88 if (emode != CEED_EVAL_WEIGHT) { 89 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 90 CeedChk(ierr); 91 CeedElemRestriction_Ref *data; 92 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 93 Ceed ceed; 94 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 95 CeedInt nelem, elemsize, nnodes; 96 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 98 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 99 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 100 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 101 blksize, nnodes, ncomp, 102 CEED_MEM_HOST, CEED_COPY_VALUES, 103 data->indices, &blkrestr[i+starte]); 104 CeedChk(ierr); 105 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 106 &fullevecs[i+starte]); 107 CeedChk(ierr); 108 } 109 110 switch(emode) { 111 case CEED_EVAL_NONE: 112 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 113 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 114 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 115 break; 116 case CEED_EVAL_INTERP: 117 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 118 ierr = CeedElemRestrictionGetElementSize(r, &P); 119 CeedChk(ierr); 120 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 121 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 122 break; 123 case CEED_EVAL_GRAD: 124 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 125 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 126 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 127 ierr = CeedElemRestrictionGetElementSize(r, &P); 128 CeedChk(ierr); 129 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 130 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 131 break; 132 case CEED_EVAL_WEIGHT: // Only on input fields 133 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 134 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 135 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 136 CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 137 138 break; 139 case CEED_EVAL_DIV: 140 break; // Not implimented 141 case CEED_EVAL_CURL: 142 break; // Not implimented 143 } 144 } 145 return 0; 146 } 147 148 /* 149 CeedOperator needs to connect all the named fields (be they active or passive) 150 to the named inputs and outputs of its CeedQFunction. 151 */ 152 static int CeedOperatorSetup_Opt(CeedOperator op) { 153 int ierr; 154 bool setupdone; 155 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 156 if (setupdone) return 0; 157 Ceed ceed; 158 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 159 Ceed_Opt *ceedimpl; 160 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 161 const CeedInt blksize = ceedimpl->blksize; 162 CeedOperator_Opt *impl; 163 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 164 CeedQFunction qf; 165 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 166 CeedInt Q, numinputfields, numoutputfields; 167 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 168 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 169 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 170 CeedChk(ierr); 171 CeedOperatorField *opinputfields, *opoutputfields; 172 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 173 CeedChk(ierr); 174 CeedQFunctionField *qfinputfields, *qfoutputfields; 175 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 176 CeedChk(ierr); 177 178 // Allocate 179 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 180 CeedChk(ierr); 181 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 182 CeedChk(ierr); 183 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 184 CeedChk(ierr); 185 186 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 187 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 188 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 189 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 190 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 191 192 impl->numein = numinputfields; impl->numeout = numoutputfields; 193 194 // Set up infield and outfield pointer arrays 195 // Infields 196 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 197 impl->evecs, impl->evecsin, 198 impl->qvecsin, 0, 199 numinputfields, Q); 200 CeedChk(ierr); 201 // Outfields 202 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 203 impl->evecs, impl->evecsout, 204 impl->qvecsout, numinputfields, 205 numoutputfields, Q); 206 CeedChk(ierr); 207 208 // Identity QFunctions 209 if (impl->identityqf) { 210 CeedEvalMode inmode, outmode; 211 CeedQFunctionField *infields, *outfields; 212 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 213 214 for (CeedInt i=0; i<numinputfields; i++) { 215 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 216 CeedChk(ierr); 217 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 218 CeedChk(ierr); 219 220 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 221 impl->qvecsout[i] = impl->qvecsin[i]; 222 } 223 } 224 225 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 226 227 return 0; 228 } 229 230 // Setup Input fields 231 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 232 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 233 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 234 CeedInt ierr; 235 CeedEvalMode emode; 236 CeedVector vec; 237 CeedTransposeMode lmode; 238 uint64_t state; 239 240 for (CeedInt i=0; i<numinputfields; i++) { 241 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 242 CeedChk(ierr); 243 if (emode == CEED_EVAL_WEIGHT) { // Skip 244 } else { 245 // Get input vector 246 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 247 if (vec != CEED_VECTOR_ACTIVE) { 248 // Restrict 249 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 250 if (state != impl->inputstate[i]) { 251 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 252 CeedChk(ierr); 253 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 254 lmode, vec, impl->evecs[i], request); 255 CeedChk(ierr); 256 impl->inputstate[i] = state; 257 } 258 } else { 259 // Set Qvec for CEED_EVAL_NONE 260 if (emode == CEED_EVAL_NONE) { 261 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 262 &impl->edata[i]); CeedChk(ierr); 263 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 264 CEED_USE_POINTER, 265 impl->edata[i]); CeedChk(ierr); 266 ierr = CeedVectorRestoreArray(impl->evecsin[i], 267 &impl->edata[i]); CeedChk(ierr); 268 } 269 } 270 // Get evec 271 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 272 (const CeedScalar **) &impl->edata[i]); 273 CeedChk(ierr); 274 } 275 } 276 return 0; 277 } 278 279 // Input basis action 280 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 281 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 282 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 283 CeedOperator_Opt *impl, CeedRequest *request) { 284 CeedInt ierr; 285 CeedInt dim, elemsize, size; 286 CeedElemRestriction Erestrict; 287 CeedEvalMode emode; 288 CeedBasis basis; 289 CeedVector vec; 290 CeedTransposeMode lmode; 291 292 for (CeedInt i=0; i<numinputfields; i++) { 293 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 294 // Skip active input 295 if (skipactive) { 296 if (vec == CEED_VECTOR_ACTIVE) 297 continue; 298 } 299 300 CeedInt activein = 0; 301 // Get elemsize, emode, size 302 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 303 CeedChk(ierr); 304 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 305 CeedChk(ierr); 306 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 307 CeedChk(ierr); 308 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 309 // Restrict block active input 310 if (vec == CEED_VECTOR_ACTIVE) { 311 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 312 CeedChk(ierr); 313 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 314 CEED_NOTRANSPOSE, lmode, invec, 315 impl->evecsin[i], request); 316 CeedChk(ierr); 317 activein = 1; 318 } 319 // Basis action 320 switch(emode) { 321 case CEED_EVAL_NONE: 322 if (!activein) { 323 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 324 CEED_USE_POINTER, 325 &impl->edata[i][e*Q*size]); CeedChk(ierr); 326 } 327 break; 328 case CEED_EVAL_INTERP: 329 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 330 CeedChk(ierr); 331 if (!activein) { 332 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 333 CEED_USE_POINTER, 334 &impl->edata[i][e*elemsize*size]); 335 CeedChk(ierr); 336 } 337 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 338 CEED_EVAL_INTERP, impl->evecsin[i], 339 impl->qvecsin[i]); CeedChk(ierr); 340 break; 341 case CEED_EVAL_GRAD: 342 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 343 CeedChk(ierr); 344 if (!activein) { 345 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 346 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 347 CEED_USE_POINTER, 348 &impl->edata[i][e*elemsize*size/dim]); 349 CeedChk(ierr); 350 } 351 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 352 CEED_EVAL_GRAD, impl->evecsin[i], 353 impl->qvecsin[i]); CeedChk(ierr); 354 break; 355 case CEED_EVAL_WEIGHT: 356 break; // No action 357 case CEED_EVAL_DIV: 358 case CEED_EVAL_CURL: { 359 // LCOV_EXCL_START 360 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 361 CeedChk(ierr); 362 Ceed ceed; 363 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 364 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 365 // LCOV_EXCL_STOP 366 break; // Not implemented 367 } 368 } 369 } 370 return 0; 371 } 372 373 // Output basis action 374 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 375 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 376 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 377 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 378 CeedRequest *request) { 379 CeedInt ierr; 380 CeedElemRestriction Erestrict; 381 CeedEvalMode emode; 382 CeedBasis basis; 383 CeedVector vec; 384 CeedTransposeMode lmode; 385 386 for (CeedInt i=0; i<numoutputfields; i++) { 387 // Get elemsize, emode, size 388 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 389 CeedChk(ierr); 390 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 391 CeedChk(ierr); 392 // Basis action 393 switch(emode) { 394 case CEED_EVAL_NONE: 395 break; // No action 396 case CEED_EVAL_INTERP: 397 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 398 CeedChk(ierr); 399 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 400 CEED_EVAL_INTERP, impl->qvecsout[i], 401 impl->evecsout[i]); CeedChk(ierr); 402 break; 403 case CEED_EVAL_GRAD: 404 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 405 CeedChk(ierr); 406 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 407 CEED_EVAL_GRAD, impl->qvecsout[i], 408 impl->evecsout[i]); CeedChk(ierr); 409 break; 410 case CEED_EVAL_WEIGHT: { 411 // LCOV_EXCL_START 412 Ceed ceed; 413 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 414 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 415 "evaluation mode"); 416 // LCOV_EXCL_STOP 417 break; // Should not occur 418 } 419 case CEED_EVAL_DIV: 420 case CEED_EVAL_CURL: { 421 // LCOV_EXCL_START 422 Ceed ceed; 423 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 424 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 425 // LCOV_EXCL_STOP 426 break; // Not implemented 427 } 428 } 429 // Restrict output block 430 // Get output vector 431 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 432 if (vec == CEED_VECTOR_ACTIVE) 433 vec = outvec; 434 // Restrict 435 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 436 CeedChk(ierr); 437 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 438 e/blksize, CEED_TRANSPOSE, 439 lmode, impl->evecsout[i], 440 vec, request); CeedChk(ierr); 441 } 442 return 0; 443 } 444 445 // Restore Inputs 446 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 447 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 448 bool skipactive, CeedOperator_Opt *impl) { 449 CeedInt ierr; 450 CeedEvalMode emode; 451 452 for (CeedInt i=0; i<numinputfields; i++) { 453 // Skip active inputs 454 if (skipactive) { 455 CeedVector vec; 456 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 457 if (vec == CEED_VECTOR_ACTIVE) 458 continue; 459 } 460 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 461 CeedChk(ierr); 462 if (emode == CEED_EVAL_WEIGHT) { // Skip 463 } else { 464 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 465 (const CeedScalar **) &impl->edata[i]); 466 CeedChk(ierr); 467 } 468 } 469 return 0; 470 } 471 472 // Apply Ceed Operator 473 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 474 CeedVector outvec, CeedRequest *request) { 475 int ierr; 476 Ceed ceed; 477 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 478 Ceed_Opt *ceedimpl; 479 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 480 CeedInt blksize = ceedimpl->blksize; 481 CeedOperator_Opt *impl; 482 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 483 CeedInt Q, numinputfields, numoutputfields, numelements; 484 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 485 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 486 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 487 CeedQFunction qf; 488 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 489 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 490 CeedChk(ierr); 491 CeedOperatorField *opinputfields, *opoutputfields; 492 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 493 CeedChk(ierr); 494 CeedQFunctionField *qfinputfields, *qfoutputfields; 495 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 496 CeedChk(ierr); 497 CeedEvalMode emode; 498 CeedVector vec; 499 500 // Setup 501 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 502 503 // Input Evecs and Restriction 504 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 505 opinputfields, invec, impl, request); 506 CeedChk(ierr); 507 508 // Output Lvecs, Evecs, and Qvecs 509 for (CeedInt i=0; i<numoutputfields; i++) { 510 // Zero Lvecs 511 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 512 if (vec == CEED_VECTOR_ACTIVE) { 513 if (!impl->add) { 514 vec = outvec; 515 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 516 } 517 } else { 518 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 519 } 520 // Set Qvec if needed 521 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 522 CeedChk(ierr); 523 if (emode == CEED_EVAL_NONE) { 524 // Set qvec to single block evec 525 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 526 &impl->edata[i + numinputfields]); 527 CeedChk(ierr); 528 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 529 CEED_USE_POINTER, 530 impl->edata[i + numinputfields]); CeedChk(ierr); 531 ierr = CeedVectorRestoreArray(impl->evecsout[i], 532 &impl->edata[i + numinputfields]); 533 CeedChk(ierr); 534 } 535 } 536 impl->add = false; 537 538 // Loop through elements 539 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 540 // Input basis apply 541 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 542 numinputfields, blksize, invec, false, 543 impl, request); CeedChk(ierr); 544 545 // Q function 546 if (!impl->identityqf) { 547 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 548 CeedChk(ierr); 549 } 550 551 // Output basis apply and restrict 552 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 553 blksize, numinputfields, numoutputfields, 554 op, outvec, impl, request); 555 CeedChk(ierr); 556 } 557 558 // Restore input arrays 559 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 560 opinputfields, false, impl); 561 CeedChk(ierr); 562 563 return 0; 564 } 565 566 // Assemble Linear QFunction 567 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 568 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 569 int ierr; 570 Ceed ceed; 571 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 572 Ceed_Opt *ceedimpl; 573 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 574 const CeedInt blksize = ceedimpl->blksize; 575 CeedOperator_Opt *impl; 576 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 577 CeedInt Q, numinputfields, numoutputfields, numelements, size; 578 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 579 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 580 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 581 CeedQFunction qf; 582 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 583 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 584 CeedChk(ierr); 585 CeedOperatorField *opinputfields, *opoutputfields; 586 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 587 CeedChk(ierr); 588 CeedQFunctionField *qfinputfields, *qfoutputfields; 589 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 590 CeedChk(ierr); 591 CeedVector vec, lvec; 592 CeedInt numactivein = 0, numactiveout = 0; 593 CeedVector *activein = NULL; 594 CeedScalar *a, *tmp; 595 596 // Setup 597 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 598 599 // Check for identity 600 if (impl->identityqf) 601 // LCOV_EXCL_START 602 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 603 // LCOV_EXCL_STOP 604 605 // Input Evecs and Restriction 606 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 607 opinputfields, NULL, impl, request); 608 CeedChk(ierr); 609 610 // Count number of active input fields 611 for (CeedInt i=0; i<numinputfields; i++) { 612 // Get input vector 613 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 614 // Check if active input 615 if (vec == CEED_VECTOR_ACTIVE) { 616 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 617 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 618 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 619 CeedChk(ierr); 620 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 621 for (CeedInt field=0; field<size; field++) { 622 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 623 CeedChk(ierr); 624 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 625 CEED_USE_POINTER, &tmp[field*Q*blksize]); 626 CeedChk(ierr); 627 } 628 numactivein += size; 629 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 630 } 631 } 632 633 // Count number of active output fields 634 for (CeedInt i=0; i<numoutputfields; i++) { 635 // Get output vector 636 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 637 // Check if active output 638 if (vec == CEED_VECTOR_ACTIVE) { 639 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 640 numactiveout += size; 641 } 642 } 643 644 // Check sizes 645 if (!numactivein || !numactiveout) 646 // LCOV_EXCL_START 647 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 648 "and outputs"); 649 // LCOV_EXCL_STOP 650 651 // Setup lvec 652 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 653 &lvec); CeedChk(ierr); 654 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 655 656 // Create output restriction 657 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 658 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 659 // Create assembled vector 660 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 661 assembled); CeedChk(ierr); 662 663 // Loop through elements 664 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 665 // Input basis apply 666 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 667 numinputfields, blksize, NULL, true, 668 impl, request); CeedChk(ierr); 669 670 // Assemble QFunction 671 for (CeedInt in=0; in<numactivein; in++) { 672 // Set Inputs 673 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 674 if (numactivein > 1) { 675 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 676 0.0); CeedChk(ierr); 677 } 678 // Set Outputs 679 for (CeedInt out=0; out<numoutputfields; out++) { 680 // Get output vector 681 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 682 CeedChk(ierr); 683 // Check if active output 684 if (vec == CEED_VECTOR_ACTIVE) { 685 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 686 CEED_USE_POINTER, a); CeedChk(ierr); 687 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 688 CeedChk(ierr); 689 a += size*Q*blksize; // Advance the pointer by the size of the output 690 } 691 } 692 // Apply QFunction 693 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 694 CeedChk(ierr); 695 } 696 } 697 698 // Un-set output Qvecs to prevent accidental overwrite of Assembled 699 for (CeedInt out=0; out<numoutputfields; out++) { 700 // Get output vector 701 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 702 CeedChk(ierr); 703 // Check if active output 704 if (vec == CEED_VECTOR_ACTIVE) { 705 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 706 NULL); CeedChk(ierr); 707 } 708 } 709 710 // Restore input arrays 711 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 712 opinputfields, true, impl); 713 CeedChk(ierr); 714 715 // Output blocked restriction 716 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 717 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 718 CeedElemRestriction blkrstr; 719 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 720 numelements*Q, 721 numactivein*numactiveout, 722 CEED_MEM_HOST, CEED_COPY_VALUES, 723 NULL, &blkrstr); CeedChk(ierr); 724 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 725 lvec, *assembled, request); CeedChk(ierr); 726 727 // Cleanup 728 for (CeedInt i=0; i<numactivein; i++) { 729 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 730 } 731 ierr = CeedFree(&activein); CeedChk(ierr); 732 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 733 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 734 735 return 0; 736 } 737 738 int CeedOperatorCreate_Opt(CeedOperator op) { 739 int ierr; 740 Ceed ceed; 741 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 742 Ceed_Opt *ceedimpl; 743 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 744 CeedInt blksize = ceedimpl->blksize; 745 CeedOperator_Opt *impl; 746 747 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 748 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 749 750 if (blksize == 1 || blksize == 8) { 751 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 752 CeedOperatorAssembleLinearQFunction_Opt); 753 CeedChk(ierr); 754 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 755 CeedOperatorApply_Opt); CeedChk(ierr); 756 } else { 757 // LCOV_EXCL_START 758 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 759 // LCOV_EXCL_STOP 760 } 761 762 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 763 CeedOperatorDestroy_Opt); CeedChk(ierr); 764 return 0; 765 } 766