1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 static int CeedOperatorDestroy_Opt(CeedOperator op) { 22 int ierr; 23 CeedOperator_Opt *impl; 24 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 25 26 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 27 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 28 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 29 } 30 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 31 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 32 ierr = CeedFree(&impl->edata); CeedChk(ierr); 33 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 34 35 for (CeedInt i=0; i<impl->numein; i++) { 36 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 37 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 38 } 39 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 40 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 41 42 for (CeedInt i=0; i<impl->numeout; i++) { 43 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 44 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 45 } 46 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 47 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 48 49 ierr = CeedFree(&impl); CeedChk(ierr); 50 return 0; 51 } 52 53 /* 54 Setup infields or outfields 55 */ 56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 57 bool inOrOut, const CeedInt blksize, 58 CeedElemRestriction *blkrestr, 59 CeedVector *fullevecs, CeedVector *evecs, 60 CeedVector *qvecs, CeedInt starte, 61 CeedInt numfields, CeedInt Q) { 62 CeedInt dim, ierr, ncomp, size, P; 63 Ceed ceed; 64 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 65 CeedBasis basis; 66 CeedElemRestriction r; 67 CeedOperatorField *opfields; 68 CeedQFunctionField *qffields; 69 if (inOrOut) { 70 ierr = CeedOperatorGetFields(op, NULL, &opfields); 71 CeedChk(ierr); 72 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 73 CeedChk(ierr); 74 } else { 75 ierr = CeedOperatorGetFields(op, &opfields, NULL); 76 CeedChk(ierr); 77 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 78 CeedChk(ierr); 79 } 80 81 // Loop over fields 82 for (CeedInt i=0; i<numfields; i++) { 83 CeedEvalMode emode; 84 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 85 86 if (emode != CEED_EVAL_WEIGHT) { 87 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 88 CeedChk(ierr); 89 CeedElemRestriction_Ref *data; 90 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 91 Ceed ceed; 92 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 93 CeedInt nelem, elemsize, nnodes; 94 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 95 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 96 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 97 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 98 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 99 blksize, nnodes, ncomp, 100 CEED_MEM_HOST, CEED_COPY_VALUES, 101 data->indices, &blkrestr[i+starte]); 102 CeedChk(ierr); 103 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 104 &fullevecs[i+starte]); 105 CeedChk(ierr); 106 } 107 108 switch(emode) { 109 case CEED_EVAL_NONE: 110 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 111 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 112 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 113 break; 114 case CEED_EVAL_INTERP: 115 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChk(ierr); 118 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 119 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 120 break; 121 case CEED_EVAL_GRAD: 122 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 123 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 124 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 125 ierr = CeedElemRestrictionGetElementSize(r, &P); 126 CeedChk(ierr); 127 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 128 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 129 break; 130 case CEED_EVAL_WEIGHT: // Only on input fields 131 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 132 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 133 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 134 CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr); 135 136 break; 137 case CEED_EVAL_DIV: 138 break; // Not implimented 139 case CEED_EVAL_CURL: 140 break; // Not implimented 141 } 142 } 143 return 0; 144 } 145 146 /* 147 CeedOperator needs to connect all the named fields (be they active or passive) 148 to the named inputs and outputs of its CeedQFunction. 149 */ 150 static int CeedOperatorSetup_Opt(CeedOperator op) { 151 int ierr; 152 bool setupdone; 153 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 154 if (setupdone) return 0; 155 Ceed ceed; 156 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 157 Ceed_Opt *ceedimpl; 158 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 159 const CeedInt blksize = ceedimpl->blksize; 160 CeedOperator_Opt *impl; 161 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 162 CeedQFunction qf; 163 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 164 CeedInt Q, numinputfields, numoutputfields; 165 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 166 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 167 CeedChk(ierr); 168 CeedOperatorField *opinputfields, *opoutputfields; 169 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 170 CeedChk(ierr); 171 CeedQFunctionField *qfinputfields, *qfoutputfields; 172 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 173 CeedChk(ierr); 174 175 // Allocate 176 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 177 CeedChk(ierr); 178 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 179 CeedChk(ierr); 180 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 181 CeedChk(ierr); 182 183 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 184 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 185 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 186 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 187 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 188 189 impl->numein = numinputfields; impl->numeout = numoutputfields; 190 191 // Set up infield and outfield pointer arrays 192 // Infields 193 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 194 impl->evecs, impl->evecsin, 195 impl->qvecsin, 0, 196 numinputfields, Q); 197 CeedChk(ierr); 198 // Outfields 199 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 200 impl->evecs, impl->evecsout, 201 impl->qvecsout, numinputfields, 202 numoutputfields, Q); 203 CeedChk(ierr); 204 205 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 206 207 return 0; 208 } 209 210 211 // Setup Input fields 212 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 213 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 214 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 215 CeedInt ierr; 216 CeedEvalMode emode; 217 CeedVector vec; 218 CeedTransposeMode lmode; 219 uint64_t state; 220 221 for (CeedInt i=0; i<numinputfields; i++) { 222 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 223 CeedChk(ierr); 224 if (emode == CEED_EVAL_WEIGHT) { // Skip 225 } else { 226 // Get input vector 227 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 228 if (vec != CEED_VECTOR_ACTIVE) { 229 // Restrict 230 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 231 if (state != impl->inputstate[i]) { 232 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 233 CeedChk(ierr); 234 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 235 lmode, vec, impl->evecs[i], request); 236 CeedChk(ierr); 237 impl->inputstate[i] = state; 238 } 239 } else { 240 // Set Qvec for CEED_EVAL_NONE 241 if (emode == CEED_EVAL_NONE) { 242 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 243 &impl->edata[i]); CeedChk(ierr); 244 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 245 CEED_USE_POINTER, 246 impl->edata[i]); CeedChk(ierr); 247 ierr = CeedVectorRestoreArray(impl->evecsin[i], 248 &impl->edata[i]); CeedChk(ierr); 249 } 250 } 251 // Get evec 252 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 253 (const CeedScalar **) &impl->edata[i]); 254 CeedChk(ierr); 255 } 256 } 257 return 0; 258 } 259 260 // Input basis action 261 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 262 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 263 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 264 CeedOperator_Opt *impl, CeedRequest *request) { 265 CeedInt ierr; 266 CeedInt dim, elemsize, size; 267 CeedElemRestriction Erestrict; 268 CeedEvalMode emode; 269 CeedBasis basis; 270 CeedVector vec; 271 CeedTransposeMode lmode; 272 273 for (CeedInt i=0; i<numinputfields; i++) { 274 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 275 // Skip active input 276 if (skipactive) { 277 if (vec == CEED_VECTOR_ACTIVE) 278 continue; 279 } 280 281 CeedInt activein = 0; 282 // Get elemsize, emode, size 283 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 284 CeedChk(ierr); 285 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 286 CeedChk(ierr); 287 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 288 CeedChk(ierr); 289 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 290 // Restrict block active input 291 if (vec == CEED_VECTOR_ACTIVE) { 292 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); 293 CeedChk(ierr); 294 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 295 CEED_NOTRANSPOSE, lmode, invec, 296 impl->evecsin[i], request); 297 CeedChk(ierr); 298 activein = 1; 299 } 300 // Basis action 301 switch(emode) { 302 case CEED_EVAL_NONE: 303 if (!activein) { 304 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 305 CEED_USE_POINTER, 306 &impl->edata[i][e*Q*size]); CeedChk(ierr); 307 } 308 break; 309 case CEED_EVAL_INTERP: 310 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 311 CeedChk(ierr); 312 if (!activein) { 313 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 314 CEED_USE_POINTER, 315 &impl->edata[i][e*elemsize*size]); 316 CeedChk(ierr); 317 } 318 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 319 CEED_EVAL_INTERP, impl->evecsin[i], 320 impl->qvecsin[i]); CeedChk(ierr); 321 break; 322 case CEED_EVAL_GRAD: 323 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 324 CeedChk(ierr); 325 if (!activein) { 326 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 327 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 328 CEED_USE_POINTER, 329 &impl->edata[i][e*elemsize*size/dim]); 330 CeedChk(ierr); 331 } 332 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 333 CEED_EVAL_GRAD, impl->evecsin[i], 334 impl->qvecsin[i]); CeedChk(ierr); 335 break; 336 case CEED_EVAL_WEIGHT: 337 break; // No action 338 case CEED_EVAL_DIV: 339 case CEED_EVAL_CURL: { 340 // LCOV_EXCL_START 341 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 342 CeedChk(ierr); 343 Ceed ceed; 344 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 345 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 346 // LCOV_EXCL_STOP 347 break; // Not implemented 348 } 349 } 350 } 351 return 0; 352 } 353 354 // Output basis action 355 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 356 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 357 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 358 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 359 CeedRequest *request) { 360 CeedInt ierr; 361 CeedElemRestriction Erestrict; 362 CeedEvalMode emode; 363 CeedBasis basis; 364 CeedVector vec; 365 CeedTransposeMode lmode; 366 367 for (CeedInt i=0; i<numoutputfields; i++) { 368 // Get elemsize, emode, size 369 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 370 CeedChk(ierr); 371 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 372 CeedChk(ierr); 373 // Basis action 374 switch(emode) { 375 case CEED_EVAL_NONE: 376 break; // No action 377 case CEED_EVAL_INTERP: 378 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 379 CeedChk(ierr); 380 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 381 CEED_EVAL_INTERP, impl->qvecsout[i], 382 impl->evecsout[i]); CeedChk(ierr); 383 break; 384 case CEED_EVAL_GRAD: 385 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 386 CeedChk(ierr); 387 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 388 CEED_EVAL_GRAD, impl->qvecsout[i], 389 impl->evecsout[i]); CeedChk(ierr); 390 break; 391 case CEED_EVAL_WEIGHT: { 392 // LCOV_EXCL_START 393 Ceed ceed; 394 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 395 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 396 "evaluation mode"); 397 // LCOV_EXCL_STOP 398 break; // Should not occur 399 } 400 case CEED_EVAL_DIV: 401 case CEED_EVAL_CURL: { 402 // LCOV_EXCL_START 403 Ceed ceed; 404 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 405 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 406 // LCOV_EXCL_STOP 407 break; // Not implemented 408 } 409 } 410 // Restrict output block 411 // Get output vector 412 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 413 if (vec == CEED_VECTOR_ACTIVE) 414 vec = outvec; 415 // Restrict 416 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); 417 CeedChk(ierr); 418 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 419 e/blksize, CEED_TRANSPOSE, 420 lmode, impl->evecsout[i], 421 vec, request); CeedChk(ierr); 422 } 423 return 0; 424 } 425 426 427 428 // Restore Inputs 429 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 430 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 431 bool skipactive, CeedOperator_Opt *impl) { 432 CeedInt ierr; 433 CeedEvalMode emode; 434 435 for (CeedInt i=0; i<numinputfields; i++) { 436 // Skip active inputs 437 if (skipactive) { 438 CeedVector vec; 439 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 440 if (vec == CEED_VECTOR_ACTIVE) 441 continue; 442 } 443 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 444 CeedChk(ierr); 445 if (emode == CEED_EVAL_WEIGHT) { // Skip 446 } else { 447 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 448 (const CeedScalar **) &impl->edata[i]); 449 CeedChk(ierr); 450 } 451 } 452 return 0; 453 } 454 455 // Apply Ceed Operator 456 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 457 CeedVector outvec, CeedRequest *request) { 458 int ierr; 459 Ceed ceed; 460 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 461 Ceed_Opt *ceedimpl; 462 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 463 CeedInt blksize = ceedimpl->blksize; 464 CeedOperator_Opt *impl; 465 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 466 CeedInt Q, numinputfields, numoutputfields, numelements; 467 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 468 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 469 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 470 CeedQFunction qf; 471 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 472 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 473 CeedChk(ierr); 474 CeedOperatorField *opinputfields, *opoutputfields; 475 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 476 CeedChk(ierr); 477 CeedQFunctionField *qfinputfields, *qfoutputfields; 478 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 479 CeedChk(ierr); 480 CeedEvalMode emode; 481 CeedVector vec; 482 483 // Setup 484 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 485 486 // Input Evecs and Restriction 487 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 488 opinputfields, invec, impl, request); CeedChk(ierr); 489 490 // Output Lvecs, Evecs, and Qvecs 491 for (CeedInt i=0; i<numoutputfields; i++) { 492 // Zero Lvecs 493 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 494 if (vec == CEED_VECTOR_ACTIVE) { 495 if (!impl->add) { 496 vec = outvec; 497 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 498 } 499 } else { 500 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 501 } 502 // Set Qvec if needed 503 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 504 CeedChk(ierr); 505 if (emode == CEED_EVAL_NONE) { 506 // Set qvec to single block evec 507 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 508 &impl->edata[i + numinputfields]); 509 CeedChk(ierr); 510 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 511 CEED_USE_POINTER, 512 impl->edata[i + numinputfields]); CeedChk(ierr); 513 ierr = CeedVectorRestoreArray(impl->evecsout[i], 514 &impl->edata[i + numinputfields]); 515 CeedChk(ierr); 516 } 517 } 518 impl->add = false; 519 520 // Loop through elements 521 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 522 // Input basis apply 523 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 524 numinputfields, blksize, invec, false, 525 impl, request); CeedChk(ierr); 526 527 // Q function 528 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 529 CeedChk(ierr); 530 531 // Output basis apply and restrict 532 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 533 blksize, numinputfields, numoutputfields, 534 op, outvec, impl, request); 535 CeedChk(ierr); 536 } 537 538 // Restore input arrays 539 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 540 opinputfields, false, impl); 541 CeedChk(ierr); 542 543 return 0; 544 } 545 546 // Assemble Linear QFunction 547 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 548 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 549 int ierr; 550 Ceed ceed; 551 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 552 Ceed_Opt *ceedimpl; 553 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 554 const CeedInt blksize = ceedimpl->blksize; 555 CeedOperator_Opt *impl; 556 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 557 CeedInt Q, numinputfields, numoutputfields, numelements, size; 558 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 559 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 560 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 561 CeedQFunction qf; 562 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 563 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 564 CeedChk(ierr); 565 CeedOperatorField *opinputfields, *opoutputfields; 566 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 567 CeedChk(ierr); 568 CeedQFunctionField *qfinputfields, *qfoutputfields; 569 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 570 CeedChk(ierr); 571 CeedVector vec, lvec; 572 CeedInt numactivein = 0, numactiveout = 0; 573 CeedVector *activein = NULL; 574 CeedScalar *a, *tmp; 575 576 // Setup 577 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 578 579 // Input Evecs and Restriction 580 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 581 opinputfields, NULL, impl, request); 582 CeedChk(ierr); 583 584 // Count number of active input fields 585 for (CeedInt i=0; i<numinputfields; i++) { 586 // Get input vector 587 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 588 // Check if active input 589 if (vec == CEED_VECTOR_ACTIVE) { 590 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 591 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 592 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 593 CeedChk(ierr); 594 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 595 for (CeedInt field=0; field<size; field++) { 596 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 597 CeedChk(ierr); 598 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 599 CEED_USE_POINTER, &tmp[field*Q*blksize]); 600 CeedChk(ierr); 601 } 602 numactivein += size; 603 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 604 } 605 } 606 607 // Count number of active output fields 608 for (CeedInt i=0; i<numoutputfields; i++) { 609 // Get output vector 610 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 611 // Check if active output 612 if (vec == CEED_VECTOR_ACTIVE) { 613 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 614 numactiveout += size; 615 } 616 } 617 618 // Check sizes 619 if (!numactivein || !numactiveout) 620 // LCOV_EXCL_START 621 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 622 "and outputs"); 623 // LCOV_EXCL_STOP 624 625 // Setup lvec 626 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 627 &lvec); CeedChk(ierr); 628 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 629 630 // Create output restriction 631 ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q, 632 numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr); 633 // Create assembled vector 634 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 635 assembled); CeedChk(ierr); 636 637 // Loop through elements 638 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 639 // Input basis apply 640 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 641 numinputfields, blksize, NULL, true, 642 impl, request); CeedChk(ierr); 643 644 // Assemble QFunction 645 for (CeedInt in=0; in<numactivein; in++) { 646 // Set Inputs 647 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 648 if (numactivein > 1) { 649 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 650 0.0); CeedChk(ierr); 651 } 652 // Set Outputs 653 for (CeedInt out=0; out<numoutputfields; out++) { 654 // Get output vector 655 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 656 CeedChk(ierr); 657 // Check if active output 658 if (vec == CEED_VECTOR_ACTIVE) { 659 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 660 CEED_USE_POINTER, a); CeedChk(ierr); 661 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 662 CeedChk(ierr); 663 a += size*Q*blksize; // Advance the pointer by the size of the output 664 } 665 } 666 // Apply QFunction 667 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 668 CeedChk(ierr); 669 } 670 } 671 672 // Un-set output Qvecs to prevent accidental overwrite of Assembled 673 for (CeedInt out=0; out<numoutputfields; out++) { 674 // Get output vector 675 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 676 CeedChk(ierr); 677 // Check if active output 678 if (vec == CEED_VECTOR_ACTIVE) { 679 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 680 NULL); CeedChk(ierr); 681 } 682 } 683 684 // Restore input arrays 685 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 686 opinputfields, true, impl); 687 CeedChk(ierr); 688 689 // Output blocked restriction 690 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 691 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 692 CeedElemRestriction blkrstr; 693 ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize, 694 numelements*Q, 695 numactivein*numactiveout, 696 CEED_MEM_HOST, CEED_COPY_VALUES, 697 NULL, &blkrstr); CeedChk(ierr); 698 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE, 699 lvec, *assembled, request); CeedChk(ierr); 700 701 // Cleanup 702 for (CeedInt i=0; i<numactivein; i++) { 703 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 704 } 705 ierr = CeedFree(&activein); CeedChk(ierr); 706 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 707 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 708 709 return 0; 710 } 711 712 int CeedOperatorCreate_Opt(CeedOperator op) { 713 int ierr; 714 Ceed ceed; 715 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 716 Ceed_Opt *ceedimpl; 717 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 718 CeedInt blksize = ceedimpl->blksize; 719 CeedOperator_Opt *impl; 720 721 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 722 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 723 724 if (blksize == 1 || blksize == 8) { 725 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 726 CeedOperatorAssembleLinearQFunction_Opt); 727 CeedChk(ierr); 728 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 729 CeedOperatorApply_Opt); CeedChk(ierr); 730 } else { 731 // LCOV_EXCL_START 732 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 733 // LCOV_EXCL_STOP 734 } 735 736 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 737 CeedOperatorDestroy_Opt); CeedChk(ierr); 738 return 0; 739 } 740