1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 static int CeedOperatorDestroy_Opt(CeedOperator op) { 22 int ierr; 23 CeedOperator_Opt *impl; 24 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 25 26 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29 } 30 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32 ierr = CeedFree(&impl->edata); CeedChk(ierr); 33 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 34 35 for (CeedInt i=0; i<impl->numein; i++) { 36 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 37 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 38 } 39 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 40 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 41 42 for (CeedInt i=0; i<impl->numeout; i++) { 43 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 44 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 45 } 46 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 47 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 48 49 ierr = CeedFree(&impl); CeedChk(ierr); 50 return 0; 51 } 52 53 /* 54 Setup infields or outfields 55 */ 56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 57 bool inOrOut, const CeedInt blksize, 58 CeedElemRestriction *blkrestr, 59 CeedVector *fullevecs, CeedVector *evecs, 60 CeedVector *qvecs, CeedInt starte, 61 CeedInt numfields, CeedInt Q) { 62 CeedInt dim, ierr, ncomp, size, P; 63 Ceed ceed; 64 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 65 CeedBasis basis; 66 CeedElemRestriction r; 67 CeedOperatorField *opfields; 68 CeedQFunctionField *qffields; 69 if (inOrOut) { 70 ierr = CeedOperatorGetFields(op, NULL, &opfields); 71 CeedChk(ierr); 72 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 73 CeedChk(ierr); 74 } else { 75 ierr = CeedOperatorGetFields(op, &opfields, NULL); 76 CeedChk(ierr); 77 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 78 CeedChk(ierr); 79 } 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 112 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 113 break; 114 case CEED_EVAL_INTERP: 115 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 119 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 120 break; 121 case CEED_EVAL_GRAD: 122 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 123 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 124 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 125 ierr = CeedElemRestrictionGetElementSize(r, &P); 126 CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 128 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 129 break; 130 case CEED_EVAL_WEIGHT: // Only on input fields 131 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 132 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 133 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 134 CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 135 136 break; 137 case CEED_EVAL_DIV: 138 break; // Not implimented 139 case CEED_EVAL_CURL: 140 break; // Not implimented 141 } 142 } 143 return 0; 144 } 145 146 /* 147 CeedOperator needs to connect all the named fields (be they active or passive) 148 to the named inputs and outputs of its CeedQFunction. 149 */ 150 static int CeedOperatorSetup_Opt(CeedOperator op) { 151 int ierr; 152 bool setupdone; 153 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 154 if (setupdone) return 0; 155 Ceed ceed; 156 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 157 Ceed_Opt *ceedimpl; 158 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 159 const CeedInt blksize = ceedimpl->blksize; 160 CeedOperator_Opt *impl; 161 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 162 CeedQFunction qf; 163 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 164 CeedInt Q, numinputfields, numoutputfields; 165 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 166 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 167 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 168 CeedChk(ierr); 169 CeedOperatorField *opinputfields, *opoutputfields; 170 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 171 CeedChk(ierr); 172 CeedQFunctionField *qfinputfields, *qfoutputfields; 173 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 174 CeedChk(ierr); 175 176 // Allocate 177 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 178 CeedChk(ierr); 179 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 180 CeedChk(ierr); 181 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 182 CeedChk(ierr); 183 184 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 185 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 186 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 187 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 188 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 189 190 impl->numein = numinputfields; impl->numeout = numoutputfields; 191 192 // Set up infield and outfield pointer arrays 193 // Infields 194 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 195 impl->evecs, impl->evecsin, 196 impl->qvecsin, 0, 197 numinputfields, Q); 198 CeedChk(ierr); 199 // Outfields 200 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 201 impl->evecs, impl->evecsout, 202 impl->qvecsout, numinputfields, 203 numoutputfields, Q); 204 CeedChk(ierr); 205 206 // Identity QFunctions 207 if (impl->identityqf) { 208 CeedEvalMode inmode, outmode; 209 CeedQFunctionField *infields, *outfields; 210 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 211 212 for (CeedInt i=0; i<numinputfields; i++) { 213 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 214 CeedChk(ierr); 215 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 216 CeedChk(ierr); 217 218 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 219 impl->qvecsout[i] = impl->qvecsin[i]; 220 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 221 } 222 } 223 224 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 225 226 return 0; 227 } 228 229 // Setup Input fields 230 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 231 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 232 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 233 CeedInt ierr; 234 CeedEvalMode emode; 235 CeedVector vec; 236 CeedTransposeMode lmode; 237 uint64_t state; 238 239 for (CeedInt i=0; i<numinputfields; i++) { 240 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 241 CeedChk(ierr); 242 if (emode == CEED_EVAL_WEIGHT) { // Skip 243 } else { 244 // Get input vector 245 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 246 if (vec != CEED_VECTOR_ACTIVE) { 247 // Restrict 248 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 249 if (state != impl->inputstate[i]) { 250 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 251 CeedChk(ierr); 252 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 253 lmode, vec, impl->evecs[i], request); 254 CeedChk(ierr); 255 impl->inputstate[i] = state; 256 } 257 } else { 258 // Set Qvec for CEED_EVAL_NONE 259 if (emode == CEED_EVAL_NONE) { 260 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 261 &impl->edata[i]); CeedChk(ierr); 262 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 263 CEED_USE_POINTER, 264 impl->edata[i]); CeedChk(ierr); 265 ierr = CeedVectorRestoreArray(impl->evecsin[i], 266 &impl->edata[i]); CeedChk(ierr); 267 } 268 } 269 // Get evec 270 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 271 (const CeedScalar **) &impl->edata[i]); 272 CeedChk(ierr); 273 } 274 } 275 return 0; 276 } 277 278 // Input basis action 279 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 280 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 281 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 282 CeedOperator_Opt *impl, CeedRequest *request) { 283 CeedInt ierr; 284 CeedInt dim, elemsize, size; 285 CeedElemRestriction Erestrict; 286 CeedEvalMode emode; 287 CeedBasis basis; 288 CeedVector vec; 289 CeedTransposeMode lmode; 290 291 for (CeedInt i=0; i<numinputfields; i++) { 292 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 293 // Skip active input 294 if (skipactive) { 295 if (vec == CEED_VECTOR_ACTIVE) 296 continue; 297 } 298 299 CeedInt activein = 0; 300 // Get elemsize, emode, size 301 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 302 CeedChk(ierr); 303 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 304 CeedChk(ierr); 305 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 306 CeedChk(ierr); 307 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 308 // Restrict block active input 309 if (vec == CEED_VECTOR_ACTIVE) { 310 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 311 CeedChk(ierr); 312 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 313 CEED_NOTRANSPOSE, lmode, invec, 314 impl->evecsin[i], request); 315 CeedChk(ierr); 316 activein = 1; 317 } 318 // Basis action 319 switch(emode) { 320 case CEED_EVAL_NONE: 321 if (!activein) { 322 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 323 CEED_USE_POINTER, 324 &impl->edata[i][e*Q*size]); CeedChk(ierr); 325 } 326 break; 327 case CEED_EVAL_INTERP: 328 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 329 CeedChk(ierr); 330 if (!activein) { 331 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 332 CEED_USE_POINTER, 333 &impl->edata[i][e*elemsize*size]); 334 CeedChk(ierr); 335 } 336 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 337 CEED_EVAL_INTERP, impl->evecsin[i], 338 impl->qvecsin[i]); CeedChk(ierr); 339 break; 340 case CEED_EVAL_GRAD: 341 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 342 CeedChk(ierr); 343 if (!activein) { 344 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 345 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 346 CEED_USE_POINTER, 347 &impl->edata[i][e*elemsize*size/dim]); 348 CeedChk(ierr); 349 } 350 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 351 CEED_EVAL_GRAD, impl->evecsin[i], 352 impl->qvecsin[i]); CeedChk(ierr); 353 break; 354 case CEED_EVAL_WEIGHT: 355 break; // No action 356 case CEED_EVAL_DIV: 357 case CEED_EVAL_CURL: { 358 // LCOV_EXCL_START 359 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 360 CeedChk(ierr); 361 Ceed ceed; 362 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 363 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 364 // LCOV_EXCL_STOP 365 break; // Not implemented 366 } 367 } 368 } 369 return 0; 370 } 371 372 // Output basis action 373 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 374 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 375 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 376 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 377 CeedRequest *request) { 378 CeedInt ierr; 379 CeedElemRestriction Erestrict; 380 CeedEvalMode emode; 381 CeedBasis basis; 382 CeedVector vec; 383 CeedTransposeMode lmode; 384 385 for (CeedInt i=0; i<numoutputfields; i++) { 386 // Get elemsize, emode, size 387 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 388 CeedChk(ierr); 389 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 390 CeedChk(ierr); 391 // Basis action 392 switch(emode) { 393 case CEED_EVAL_NONE: 394 break; // No action 395 case CEED_EVAL_INTERP: 396 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 397 CeedChk(ierr); 398 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 399 CEED_EVAL_INTERP, impl->qvecsout[i], 400 impl->evecsout[i]); CeedChk(ierr); 401 break; 402 case CEED_EVAL_GRAD: 403 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 404 CeedChk(ierr); 405 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 406 CEED_EVAL_GRAD, impl->qvecsout[i], 407 impl->evecsout[i]); CeedChk(ierr); 408 break; 409 case CEED_EVAL_WEIGHT: { 410 // LCOV_EXCL_START 411 Ceed ceed; 412 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 413 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 414 "evaluation mode"); 415 // LCOV_EXCL_STOP 416 break; // Should not occur 417 } 418 case CEED_EVAL_DIV: 419 case CEED_EVAL_CURL: { 420 // LCOV_EXCL_START 421 Ceed ceed; 422 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 423 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 424 // LCOV_EXCL_STOP 425 break; // Not implemented 426 } 427 } 428 // Restrict output block 429 // Get output vector 430 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 431 if (vec == CEED_VECTOR_ACTIVE) 432 vec = outvec; 433 // Restrict 434 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 435 CeedChk(ierr); 436 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 437 e/blksize, CEED_TRANSPOSE, 438 lmode, impl->evecsout[i], 439 vec, request); CeedChk(ierr); 440 } 441 return 0; 442 } 443 444 // Restore Inputs 445 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 446 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 447 bool skipactive, CeedOperator_Opt *impl) { 448 CeedInt ierr; 449 CeedEvalMode emode; 450 451 for (CeedInt i=0; i<numinputfields; i++) { 452 // Skip active inputs 453 if (skipactive) { 454 CeedVector vec; 455 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 456 if (vec == CEED_VECTOR_ACTIVE) 457 continue; 458 } 459 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 460 CeedChk(ierr); 461 if (emode == CEED_EVAL_WEIGHT) { // Skip 462 } else { 463 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 464 (const CeedScalar **) &impl->edata[i]); 465 CeedChk(ierr); 466 } 467 } 468 return 0; 469 } 470 471 // Apply Ceed Operator 472 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 473 CeedVector outvec, CeedRequest *request) { 474 int ierr; 475 Ceed ceed; 476 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 477 Ceed_Opt *ceedimpl; 478 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 479 CeedInt blksize = ceedimpl->blksize; 480 CeedOperator_Opt *impl; 481 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 482 CeedInt Q, numinputfields, numoutputfields, numelements; 483 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 484 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 485 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 486 CeedQFunction qf; 487 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 488 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 489 CeedChk(ierr); 490 CeedOperatorField *opinputfields, *opoutputfields; 491 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 492 CeedChk(ierr); 493 CeedQFunctionField *qfinputfields, *qfoutputfields; 494 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 495 CeedChk(ierr); 496 CeedEvalMode emode; 497 CeedVector vec; 498 499 // Setup 500 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 501 502 // Input Evecs and Restriction 503 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 504 opinputfields, invec, impl, request); 505 CeedChk(ierr); 506 507 // Output Lvecs, Evecs, and Qvecs 508 for (CeedInt i=0; i<numoutputfields; i++) { 509 // Zero Lvecs 510 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 511 if (vec == CEED_VECTOR_ACTIVE) { 512 if (!impl->add) { 513 vec = outvec; 514 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 515 } 516 } else { 517 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 518 } 519 // Set Qvec if needed 520 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 521 CeedChk(ierr); 522 if (emode == CEED_EVAL_NONE) { 523 // Set qvec to single block evec 524 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 525 &impl->edata[i + numinputfields]); 526 CeedChk(ierr); 527 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 528 CEED_USE_POINTER, 529 impl->edata[i + numinputfields]); CeedChk(ierr); 530 ierr = CeedVectorRestoreArray(impl->evecsout[i], 531 &impl->edata[i + numinputfields]); 532 CeedChk(ierr); 533 } 534 } 535 impl->add = false; 536 537 // Loop through elements 538 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 539 // Input basis apply 540 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 541 numinputfields, blksize, invec, false, 542 impl, request); CeedChk(ierr); 543 544 // Q function 545 if (!impl->identityqf) { 546 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 547 CeedChk(ierr); 548 } 549 550 // Output basis apply and restrict 551 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 552 blksize, numinputfields, numoutputfields, 553 op, outvec, impl, request); 554 CeedChk(ierr); 555 } 556 557 // Restore input arrays 558 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 559 opinputfields, false, impl); 560 CeedChk(ierr); 561 562 return 0; 563 } 564 565 // Assemble Linear QFunction 566 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 567 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 568 int ierr; 569 Ceed ceed; 570 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 571 Ceed_Opt *ceedimpl; 572 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 573 const CeedInt blksize = ceedimpl->blksize; 574 CeedOperator_Opt *impl; 575 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 576 CeedInt Q, numinputfields, numoutputfields, numelements, size; 577 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 578 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 579 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 580 CeedQFunction qf; 581 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 582 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 583 CeedChk(ierr); 584 CeedOperatorField *opinputfields, *opoutputfields; 585 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 586 CeedChk(ierr); 587 CeedQFunctionField *qfinputfields, *qfoutputfields; 588 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 589 CeedChk(ierr); 590 CeedVector vec, lvec; 591 CeedInt numactivein = 0, numactiveout = 0; 592 CeedVector *activein = NULL; 593 CeedScalar *a, *tmp; 594 595 // Setup 596 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 597 598 // Check for identity 599 if (impl->identityqf) 600 // LCOV_EXCL_START 601 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 602 // LCOV_EXCL_STOP 603 604 // Input Evecs and Restriction 605 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 606 opinputfields, NULL, impl, request); 607 CeedChk(ierr); 608 609 // Count number of active input fields 610 for (CeedInt i=0; i<numinputfields; i++) { 611 // Get input vector 612 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 613 // Check if active input 614 if (vec == CEED_VECTOR_ACTIVE) { 615 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 616 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 617 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 618 CeedChk(ierr); 619 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 620 for (CeedInt field=0; field<size; field++) { 621 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 622 CeedChk(ierr); 623 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 624 CEED_USE_POINTER, &tmp[field*Q*blksize]); 625 CeedChk(ierr); 626 } 627 numactivein += size; 628 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 629 } 630 } 631 632 // Count number of active output fields 633 for (CeedInt i=0; i<numoutputfields; i++) { 634 // Get output vector 635 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 636 // Check if active output 637 if (vec == CEED_VECTOR_ACTIVE) { 638 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 639 numactiveout += size; 640 } 641 } 642 643 // Check sizes 644 if (!numactivein || !numactiveout) 645 // LCOV_EXCL_START 646 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 647 "and outputs"); 648 // LCOV_EXCL_STOP 649 650 // Setup lvec 651 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 652 &lvec); CeedChk(ierr); 653 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 654 655 // Create output restriction 656 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 657 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 658 // Create assembled vector 659 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 660 assembled); CeedChk(ierr); 661 662 // Loop through elements 663 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 664 // Input basis apply 665 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 666 numinputfields, blksize, NULL, true, 667 impl, request); CeedChk(ierr); 668 669 // Assemble QFunction 670 for (CeedInt in=0; in<numactivein; in++) { 671 // Set Inputs 672 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 673 if (numactivein > 1) { 674 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 675 0.0); CeedChk(ierr); 676 } 677 // Set Outputs 678 for (CeedInt out=0; out<numoutputfields; out++) { 679 // Get output vector 680 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 681 CeedChk(ierr); 682 // Check if active output 683 if (vec == CEED_VECTOR_ACTIVE) { 684 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 685 CEED_USE_POINTER, a); CeedChk(ierr); 686 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 687 CeedChk(ierr); 688 a += size*Q*blksize; // Advance the pointer by the size of the output 689 } 690 } 691 // Apply QFunction 692 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 693 CeedChk(ierr); 694 } 695 } 696 697 // Un-set output Qvecs to prevent accidental overwrite of Assembled 698 for (CeedInt out=0; out<numoutputfields; out++) { 699 // Get output vector 700 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 701 CeedChk(ierr); 702 // Check if active output 703 if (vec == CEED_VECTOR_ACTIVE) { 704 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 705 NULL); CeedChk(ierr); 706 } 707 } 708 709 // Restore input arrays 710 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 711 opinputfields, true, impl); 712 CeedChk(ierr); 713 714 // Output blocked restriction 715 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 716 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 717 CeedElemRestriction blkrstr; 718 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 719 numelements*Q, 720 numactivein*numactiveout, 721 CEED_MEM_HOST, CEED_COPY_VALUES, 722 NULL, &blkrstr); CeedChk(ierr); 723 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 724 lvec, *assembled, request); CeedChk(ierr); 725 726 // Cleanup 727 for (CeedInt i=0; i<numactivein; i++) { 728 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 729 } 730 ierr = CeedFree(&activein); CeedChk(ierr); 731 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 732 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 733 734 return 0; 735 } 736 737 int CeedOperatorCreate_Opt(CeedOperator op) { 738 int ierr; 739 Ceed ceed; 740 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 741 Ceed_Opt *ceedimpl; 742 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 743 CeedInt blksize = ceedimpl->blksize; 744 CeedOperator_Opt *impl; 745 746 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 747 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 748 749 if (blksize == 1 || blksize == 8) { 750 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 751 CeedOperatorAssembleLinearQFunction_Opt); 752 CeedChk(ierr); 753 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 754 CeedOperatorApply_Opt); CeedChk(ierr); 755 } else { 756 // LCOV_EXCL_START 757 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 758 // LCOV_EXCL_STOP 759 } 760 761 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 762 CeedOperatorDestroy_Opt); CeedChk(ierr); 763 return 0; 764 } 765