1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 static int CeedOperatorDestroy_Opt(CeedOperator op) { 22 int ierr; 23 CeedOperator_Opt *impl; 24 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 25 26 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29 } 30 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32 ierr = CeedFree(&impl->edata); CeedChk(ierr); 33 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 34 35 for (CeedInt i=0; i<impl->numein; i++) { 36 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 37 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 38 } 39 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 40 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 41 42 for (CeedInt i=0; i<impl->numeout; i++) { 43 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 44 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 45 } 46 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 47 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 48 49 ierr = CeedFree(&impl); CeedChk(ierr); 50 return 0; 51 } 52 53 /* 54 Setup infields or outfields 55 */ 56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 57 bool inOrOut, const CeedInt blksize, 58 CeedElemRestriction *blkrestr, 59 CeedVector *fullevecs, CeedVector *evecs, 60 CeedVector *qvecs, CeedInt starte, 61 CeedInt numfields, CeedInt Q) { 62 CeedInt dim, ierr, ncomp, size, P; 63 Ceed ceed; 64 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 65 CeedBasis basis; 66 CeedElemRestriction r; 67 CeedOperatorField *opfields; 68 CeedQFunctionField *qffields; 69 if (inOrOut) { 70 ierr = CeedOperatorGetFields(op, NULL, &opfields); 71 CeedChk(ierr); 72 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 73 CeedChk(ierr); 74 } else { 75 ierr = CeedOperatorGetFields(op, &opfields, NULL); 76 CeedChk(ierr); 77 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 78 CeedChk(ierr); 79 } 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 112 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 113 break; 114 case CEED_EVAL_INTERP: 115 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 119 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 120 break; 121 case CEED_EVAL_GRAD: 122 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 123 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 124 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 125 ierr = CeedElemRestrictionGetElementSize(r, &P); 126 CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 128 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 129 break; 130 case CEED_EVAL_WEIGHT: // Only on input fields 131 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 132 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 133 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 134 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 135 CeedChk(ierr); 136 137 break; 138 case CEED_EVAL_DIV: 139 break; // Not implimented 140 case CEED_EVAL_CURL: 141 break; // Not implimented 142 } 143 } 144 return 0; 145 } 146 147 /* 148 CeedOperator needs to connect all the named fields (be they active or passive) 149 to the named inputs and outputs of its CeedQFunction. 150 */ 151 static int CeedOperatorSetup_Opt(CeedOperator op) { 152 int ierr; 153 bool setupdone; 154 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 155 if (setupdone) return 0; 156 Ceed ceed; 157 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 158 Ceed_Opt *ceedimpl; 159 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 160 const CeedInt blksize = ceedimpl->blksize; 161 CeedOperator_Opt *impl; 162 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 163 CeedQFunction qf; 164 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 165 CeedInt Q, numinputfields, numoutputfields; 166 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 167 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 168 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 169 CeedChk(ierr); 170 CeedOperatorField *opinputfields, *opoutputfields; 171 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 172 CeedChk(ierr); 173 CeedQFunctionField *qfinputfields, *qfoutputfields; 174 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 175 CeedChk(ierr); 176 177 // Allocate 178 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 179 CeedChk(ierr); 180 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 181 CeedChk(ierr); 182 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 183 CeedChk(ierr); 184 185 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 186 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 187 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 188 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 189 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 190 191 impl->numein = numinputfields; impl->numeout = numoutputfields; 192 193 // Set up infield and outfield pointer arrays 194 // Infields 195 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 196 impl->evecs, impl->evecsin, 197 impl->qvecsin, 0, 198 numinputfields, Q); 199 CeedChk(ierr); 200 // Outfields 201 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 202 impl->evecs, impl->evecsout, 203 impl->qvecsout, numinputfields, 204 numoutputfields, Q); 205 CeedChk(ierr); 206 207 // Identity QFunctions 208 if (impl->identityqf) { 209 CeedEvalMode inmode, outmode; 210 CeedQFunctionField *infields, *outfields; 211 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 212 213 for (CeedInt i=0; i<numinputfields; i++) { 214 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 215 CeedChk(ierr); 216 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 217 CeedChk(ierr); 218 219 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 220 impl->qvecsout[i] = impl->qvecsin[i]; 221 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 222 } 223 } 224 225 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 226 227 return 0; 228 } 229 230 // Setup Input fields 231 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 232 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 233 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 234 CeedInt ierr; 235 CeedEvalMode emode; 236 CeedVector vec; 237 CeedTransposeMode lmode; 238 uint64_t state; 239 240 for (CeedInt i=0; i<numinputfields; i++) { 241 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 242 CeedChk(ierr); 243 if (emode == CEED_EVAL_WEIGHT) { // Skip 244 } else { 245 // Get input vector 246 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 247 if (vec != CEED_VECTOR_ACTIVE) { 248 // Restrict 249 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 250 if (state != impl->inputstate[i]) { 251 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 252 CeedChk(ierr); 253 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 254 lmode, vec, impl->evecs[i], request); 255 CeedChk(ierr); 256 impl->inputstate[i] = state; 257 } 258 } else { 259 // Set Qvec for CEED_EVAL_NONE 260 if (emode == CEED_EVAL_NONE) { 261 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 262 &impl->edata[i]); CeedChk(ierr); 263 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 264 CEED_USE_POINTER, 265 impl->edata[i]); CeedChk(ierr); 266 ierr = CeedVectorRestoreArray(impl->evecsin[i], 267 &impl->edata[i]); CeedChk(ierr); 268 } 269 } 270 // Get evec 271 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 272 (const CeedScalar **) &impl->edata[i]); 273 CeedChk(ierr); 274 } 275 } 276 return 0; 277 } 278 279 // Input basis action 280 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 281 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 282 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 283 CeedOperator_Opt *impl, CeedRequest *request) { 284 CeedInt ierr; 285 CeedInt dim, elemsize, size; 286 CeedElemRestriction Erestrict; 287 CeedEvalMode emode; 288 CeedBasis basis; 289 CeedVector vec; 290 CeedTransposeMode lmode; 291 292 for (CeedInt i=0; i<numinputfields; i++) { 293 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 294 // Skip active input 295 if (skipactive) { 296 if (vec == CEED_VECTOR_ACTIVE) 297 continue; 298 } 299 300 CeedInt activein = 0; 301 // Get elemsize, emode, size 302 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 303 CeedChk(ierr); 304 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 305 CeedChk(ierr); 306 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 307 CeedChk(ierr); 308 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 309 // Restrict block active input 310 if (vec == CEED_VECTOR_ACTIVE) { 311 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 312 CeedChk(ierr); 313 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 314 CEED_NOTRANSPOSE, lmode, invec, 315 impl->evecsin[i], request); 316 CeedChk(ierr); 317 activein = 1; 318 } 319 // Basis action 320 switch(emode) { 321 case CEED_EVAL_NONE: 322 if (!activein) { 323 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 324 CEED_USE_POINTER, 325 &impl->edata[i][e*Q*size]); CeedChk(ierr); 326 } 327 break; 328 case CEED_EVAL_INTERP: 329 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 330 CeedChk(ierr); 331 if (!activein) { 332 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 333 CEED_USE_POINTER, 334 &impl->edata[i][e*elemsize*size]); 335 CeedChk(ierr); 336 } 337 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 338 CEED_EVAL_INTERP, impl->evecsin[i], 339 impl->qvecsin[i]); CeedChk(ierr); 340 break; 341 case CEED_EVAL_GRAD: 342 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 343 CeedChk(ierr); 344 if (!activein) { 345 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 346 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 347 CEED_USE_POINTER, 348 &impl->edata[i][e*elemsize*size/dim]); 349 CeedChk(ierr); 350 } 351 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 352 CEED_EVAL_GRAD, impl->evecsin[i], 353 impl->qvecsin[i]); CeedChk(ierr); 354 break; 355 case CEED_EVAL_WEIGHT: 356 break; // No action 357 case CEED_EVAL_DIV: 358 case CEED_EVAL_CURL: { 359 // LCOV_EXCL_START 360 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 361 CeedChk(ierr); 362 Ceed ceed; 363 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 364 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 365 // LCOV_EXCL_STOP 366 break; // Not implemented 367 } 368 } 369 } 370 return 0; 371 } 372 373 // Output basis action 374 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 375 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 376 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 377 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 378 CeedRequest *request) { 379 CeedInt ierr; 380 CeedElemRestriction Erestrict; 381 CeedEvalMode emode; 382 CeedBasis basis; 383 CeedVector vec; 384 CeedTransposeMode lmode; 385 386 for (CeedInt i=0; i<numoutputfields; i++) { 387 // Get elemsize, emode, size 388 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 389 CeedChk(ierr); 390 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 391 CeedChk(ierr); 392 // Basis action 393 switch(emode) { 394 case CEED_EVAL_NONE: 395 break; // No action 396 case CEED_EVAL_INTERP: 397 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 398 CeedChk(ierr); 399 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 400 CEED_EVAL_INTERP, impl->qvecsout[i], 401 impl->evecsout[i]); CeedChk(ierr); 402 break; 403 case CEED_EVAL_GRAD: 404 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 405 CeedChk(ierr); 406 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 407 CEED_EVAL_GRAD, impl->qvecsout[i], 408 impl->evecsout[i]); CeedChk(ierr); 409 break; 410 case CEED_EVAL_WEIGHT: { 411 // LCOV_EXCL_START 412 Ceed ceed; 413 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 414 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 415 "evaluation mode"); 416 // LCOV_EXCL_STOP 417 break; // Should not occur 418 } 419 case CEED_EVAL_DIV: 420 case CEED_EVAL_CURL: { 421 // LCOV_EXCL_START 422 Ceed ceed; 423 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 424 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 425 // LCOV_EXCL_STOP 426 break; // Not implemented 427 } 428 } 429 // Restrict output block 430 // Get output vector 431 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 432 if (vec == CEED_VECTOR_ACTIVE) 433 vec = outvec; 434 // Restrict 435 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 436 CeedChk(ierr); 437 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 438 e/blksize, CEED_TRANSPOSE, 439 lmode, impl->evecsout[i], 440 vec, request); CeedChk(ierr); 441 } 442 return 0; 443 } 444 445 // Restore Inputs 446 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 447 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 448 bool skipactive, CeedOperator_Opt *impl) { 449 CeedInt ierr; 450 CeedEvalMode emode; 451 452 for (CeedInt i=0; i<numinputfields; i++) { 453 // Skip active inputs 454 if (skipactive) { 455 CeedVector vec; 456 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 457 if (vec == CEED_VECTOR_ACTIVE) 458 continue; 459 } 460 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 461 CeedChk(ierr); 462 if (emode == CEED_EVAL_WEIGHT) { // Skip 463 } else { 464 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 465 (const CeedScalar **) &impl->edata[i]); 466 CeedChk(ierr); 467 } 468 } 469 return 0; 470 } 471 472 // Apply Ceed Operator 473 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 474 CeedVector outvec, CeedRequest *request) { 475 int ierr; 476 Ceed ceed; 477 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 478 Ceed_Opt *ceedimpl; 479 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 480 CeedInt blksize = ceedimpl->blksize; 481 CeedOperator_Opt *impl; 482 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 483 CeedInt Q, numinputfields, numoutputfields, numelements; 484 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 485 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 486 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 487 CeedQFunction qf; 488 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 489 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 490 CeedChk(ierr); 491 CeedOperatorField *opinputfields, *opoutputfields; 492 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 493 CeedChk(ierr); 494 CeedQFunctionField *qfinputfields, *qfoutputfields; 495 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 496 CeedChk(ierr); 497 CeedEvalMode emode; 498 499 // Setup 500 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 501 502 // Input Evecs and Restriction 503 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 504 opinputfields, invec, impl, request); 505 CeedChk(ierr); 506 507 // Output Lvecs, Evecs, and Qvecs 508 for (CeedInt i=0; i<numoutputfields; i++) { 509 // Set Qvec if needed 510 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 511 CeedChk(ierr); 512 if (emode == CEED_EVAL_NONE) { 513 // Set qvec to single block evec 514 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 515 &impl->edata[i + numinputfields]); 516 CeedChk(ierr); 517 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 518 CEED_USE_POINTER, 519 impl->edata[i + numinputfields]); CeedChk(ierr); 520 ierr = CeedVectorRestoreArray(impl->evecsout[i], 521 &impl->edata[i + numinputfields]); 522 CeedChk(ierr); 523 } 524 } 525 526 // Loop through elements 527 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 528 // Input basis apply 529 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 530 numinputfields, blksize, invec, false, 531 impl, request); CeedChk(ierr); 532 533 // Q function 534 if (!impl->identityqf) { 535 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 536 CeedChk(ierr); 537 } 538 539 // Output basis apply and restrict 540 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 541 blksize, numinputfields, numoutputfields, 542 op, outvec, impl, request); 543 CeedChk(ierr); 544 } 545 546 // Restore input arrays 547 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 548 opinputfields, false, impl); 549 CeedChk(ierr); 550 551 return 0; 552 } 553 554 // Assemble Linear QFunction 555 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 556 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 557 int ierr; 558 Ceed ceed; 559 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 560 Ceed_Opt *ceedimpl; 561 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 562 const CeedInt blksize = ceedimpl->blksize; 563 CeedOperator_Opt *impl; 564 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 565 CeedInt Q, numinputfields, numoutputfields, numelements, size; 566 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 567 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 568 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 569 CeedQFunction qf; 570 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 571 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 572 CeedChk(ierr); 573 CeedOperatorField *opinputfields, *opoutputfields; 574 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 575 CeedChk(ierr); 576 CeedQFunctionField *qfinputfields, *qfoutputfields; 577 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 578 CeedChk(ierr); 579 CeedVector vec, lvec; 580 CeedInt numactivein = 0, numactiveout = 0; 581 CeedVector *activein = NULL; 582 CeedScalar *a, *tmp; 583 584 // Setup 585 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 586 587 // Check for identity 588 if (impl->identityqf) 589 // LCOV_EXCL_START 590 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 591 // LCOV_EXCL_STOP 592 593 // Input Evecs and Restriction 594 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 595 opinputfields, NULL, impl, request); 596 CeedChk(ierr); 597 598 // Count number of active input fields 599 for (CeedInt i=0; i<numinputfields; i++) { 600 // Get input vector 601 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 602 // Check if active input 603 if (vec == CEED_VECTOR_ACTIVE) { 604 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 605 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 606 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 607 CeedChk(ierr); 608 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 609 for (CeedInt field=0; field<size; field++) { 610 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 611 CeedChk(ierr); 612 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 613 CEED_USE_POINTER, &tmp[field*Q*blksize]); 614 CeedChk(ierr); 615 } 616 numactivein += size; 617 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 618 } 619 } 620 621 // Count number of active output fields 622 for (CeedInt i=0; i<numoutputfields; i++) { 623 // Get output vector 624 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 625 // Check if active output 626 if (vec == CEED_VECTOR_ACTIVE) { 627 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 628 numactiveout += size; 629 } 630 } 631 632 // Check sizes 633 if (!numactivein || !numactiveout) 634 // LCOV_EXCL_START 635 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 636 "and outputs"); 637 // LCOV_EXCL_STOP 638 639 // Setup lvec 640 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 641 &lvec); CeedChk(ierr); 642 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 643 644 // Create output restriction 645 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 646 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 647 // Create assembled vector 648 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 649 assembled); CeedChk(ierr); 650 651 // Loop through elements 652 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 653 // Input basis apply 654 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 655 numinputfields, blksize, NULL, true, 656 impl, request); CeedChk(ierr); 657 658 // Assemble QFunction 659 for (CeedInt in=0; in<numactivein; in++) { 660 // Set Inputs 661 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 662 if (numactivein > 1) { 663 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 664 0.0); CeedChk(ierr); 665 } 666 // Set Outputs 667 for (CeedInt out=0; out<numoutputfields; out++) { 668 // Get output vector 669 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 670 CeedChk(ierr); 671 // Check if active output 672 if (vec == CEED_VECTOR_ACTIVE) { 673 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 674 CEED_USE_POINTER, a); CeedChk(ierr); 675 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 676 CeedChk(ierr); 677 a += size*Q*blksize; // Advance the pointer by the size of the output 678 } 679 } 680 // Apply QFunction 681 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 682 CeedChk(ierr); 683 } 684 } 685 686 // Un-set output Qvecs to prevent accidental overwrite of Assembled 687 for (CeedInt out=0; out<numoutputfields; out++) { 688 // Get output vector 689 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 690 CeedChk(ierr); 691 // Check if active output 692 if (vec == CEED_VECTOR_ACTIVE) { 693 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 694 NULL); CeedChk(ierr); 695 } 696 } 697 698 // Restore input arrays 699 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 700 opinputfields, true, impl); 701 CeedChk(ierr); 702 703 // Output blocked restriction 704 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 705 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 706 CeedElemRestriction blkrstr; 707 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 708 numelements*Q, 709 numactivein*numactiveout, 710 CEED_MEM_HOST, CEED_COPY_VALUES, 711 NULL, &blkrstr); CeedChk(ierr); 712 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 713 lvec, *assembled, request); CeedChk(ierr); 714 715 // Cleanup 716 for (CeedInt i=0; i<numactivein; i++) { 717 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 718 } 719 ierr = CeedFree(&activein); CeedChk(ierr); 720 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 721 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 722 723 return 0; 724 } 725 726 int CeedOperatorCreate_Opt(CeedOperator op) { 727 int ierr; 728 Ceed ceed; 729 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 730 Ceed_Opt *ceedimpl; 731 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 732 CeedInt blksize = ceedimpl->blksize; 733 CeedOperator_Opt *impl; 734 735 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 736 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 737 738 if (blksize == 1 || blksize == 8) { 739 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 740 CeedOperatorAssembleLinearQFunction_Opt); 741 CeedChk(ierr); 742 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 743 CeedOperatorApply_Opt); CeedChk(ierr); 744 } else { 745 // LCOV_EXCL_START 746 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 747 // LCOV_EXCL_STOP 748 } 749 750 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 751 CeedOperatorDestroy_Opt); CeedChk(ierr); 752 return 0; 753 } 754