1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-opt.h" 19 #include "../ref/ceed-ref.h" 20 21 //------------------------------------------------------------------------------ 22 // Setup Input/Output Fields 23 //------------------------------------------------------------------------------ 24 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 25 bool inOrOut, const CeedInt blksize, 26 CeedElemRestriction *blkrestr, 27 CeedVector *fullevecs, CeedVector *evecs, 28 CeedVector *qvecs, CeedInt starte, 29 CeedInt numfields, CeedInt Q) { 30 CeedInt dim, ierr, ncomp, size, P; 31 Ceed ceed; 32 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 33 CeedBasis basis; 34 CeedElemRestriction r; 35 CeedOperatorField *opfields; 36 CeedQFunctionField *qffields; 37 if (inOrOut) { 38 ierr = CeedOperatorGetFields(op, NULL, &opfields); 39 CeedChk(ierr); 40 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 41 CeedChk(ierr); 42 } else { 43 ierr = CeedOperatorGetFields(op, &opfields, NULL); 44 CeedChk(ierr); 45 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 46 CeedChk(ierr); 47 } 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, lsize, compstride; 62 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 66 if (data->offsets) { 67 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 68 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 69 blksize, ncomp, compstride, 70 lsize, CEED_MEM_HOST, 71 CEED_COPY_VALUES, data->offsets, 72 &blkrestr[i+starte]); 73 CeedChk(ierr); 74 } else { 75 CeedInt strides[3]; 76 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 77 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 78 blksize, ncomp, lsize, strides, &blkrestr[i+starte]); 79 CeedChk(ierr); 80 } 81 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 82 &fullevecs[i+starte]); 83 CeedChk(ierr); 84 } 85 86 switch(emode) { 87 case CEED_EVAL_NONE: 88 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 89 ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr); 90 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 91 break; 92 case CEED_EVAL_INTERP: 93 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 94 ierr = CeedElemRestrictionGetElementSize(r, &P); 95 CeedChk(ierr); 96 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 97 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 98 break; 99 case CEED_EVAL_GRAD: 100 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 101 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 102 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 103 ierr = CeedElemRestrictionGetElementSize(r, &P); 104 CeedChk(ierr); 105 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 106 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 107 break; 108 case CEED_EVAL_WEIGHT: // Only on input fields 109 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 110 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 111 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 112 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 113 CeedChk(ierr); 114 115 break; 116 case CEED_EVAL_DIV: 117 break; // Not implimented 118 case CEED_EVAL_CURL: 119 break; // Not implimented 120 } 121 } 122 return 0; 123 } 124 125 //------------------------------------------------------------------------------ 126 // Setup Operator 127 //------------------------------------------------------------------------------ 128 static int CeedOperatorSetup_Opt(CeedOperator op) { 129 int ierr; 130 bool setupdone; 131 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 132 if (setupdone) return 0; 133 Ceed ceed; 134 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 135 Ceed_Opt *ceedimpl; 136 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 137 const CeedInt blksize = ceedimpl->blksize; 138 CeedOperator_Opt *impl; 139 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 140 CeedQFunction qf; 141 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 142 CeedInt Q, numinputfields, numoutputfields; 143 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 144 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 145 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 146 CeedChk(ierr); 147 CeedOperatorField *opinputfields, *opoutputfields; 148 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 149 CeedChk(ierr); 150 CeedQFunctionField *qfinputfields, *qfoutputfields; 151 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 152 CeedChk(ierr); 153 154 // Allocate 155 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 156 CeedChk(ierr); 157 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 158 CeedChk(ierr); 159 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 160 CeedChk(ierr); 161 162 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 163 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 164 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 165 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 166 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 167 168 impl->numein = numinputfields; impl->numeout = numoutputfields; 169 170 // Set up infield and outfield pointer arrays 171 // Infields 172 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr, 173 impl->evecs, impl->evecsin, 174 impl->qvecsin, 0, 175 numinputfields, Q); 176 CeedChk(ierr); 177 // Outfields 178 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr, 179 impl->evecs, impl->evecsout, 180 impl->qvecsout, numinputfields, 181 numoutputfields, Q); 182 CeedChk(ierr); 183 184 // Identity QFunctions 185 if (impl->identityqf) { 186 CeedEvalMode inmode, outmode; 187 CeedQFunctionField *infields, *outfields; 188 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 189 190 for (CeedInt i=0; i<numinputfields; i++) { 191 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 192 CeedChk(ierr); 193 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 194 CeedChk(ierr); 195 196 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 197 impl->qvecsout[i] = impl->qvecsin[i]; 198 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 199 } 200 } 201 202 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 203 204 return 0; 205 } 206 207 //------------------------------------------------------------------------------ 208 // Setup Input Fields 209 //------------------------------------------------------------------------------ 210 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields, 211 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 212 CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) { 213 CeedInt ierr; 214 CeedEvalMode emode; 215 CeedVector vec; 216 uint64_t state; 217 218 for (CeedInt i=0; i<numinputfields; i++) { 219 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 220 CeedChk(ierr); 221 if (emode == CEED_EVAL_WEIGHT) { // Skip 222 } else { 223 // Get input vector 224 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 225 if (vec != CEED_VECTOR_ACTIVE) { 226 // Restrict 227 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 228 if (state != impl->inputstate[i]) { 229 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 230 vec, impl->evecs[i], request); 231 CeedChk(ierr); 232 impl->inputstate[i] = state; 233 } 234 } else { 235 // Set Qvec for CEED_EVAL_NONE 236 if (emode == CEED_EVAL_NONE) { 237 ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST, 238 &impl->edata[i]); CeedChk(ierr); 239 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 240 CEED_USE_POINTER, 241 impl->edata[i]); CeedChk(ierr); 242 ierr = CeedVectorRestoreArray(impl->evecsin[i], 243 &impl->edata[i]); CeedChk(ierr); 244 } 245 } 246 // Get evec 247 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 248 (const CeedScalar **) &impl->edata[i]); 249 CeedChk(ierr); 250 } 251 } 252 return 0; 253 } 254 255 //------------------------------------------------------------------------------ 256 // Input Basis Action 257 //------------------------------------------------------------------------------ 258 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 259 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 260 CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive, 261 CeedOperator_Opt *impl, CeedRequest *request) { 262 CeedInt ierr; 263 CeedInt dim, elemsize, size; 264 CeedElemRestriction Erestrict; 265 CeedEvalMode emode; 266 CeedBasis basis; 267 CeedVector vec; 268 269 for (CeedInt i=0; i<numinputfields; i++) { 270 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 271 // Skip active input 272 if (skipactive) { 273 if (vec == CEED_VECTOR_ACTIVE) 274 continue; 275 } 276 277 CeedInt activein = 0; 278 // Get elemsize, emode, size 279 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 280 CeedChk(ierr); 281 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 282 CeedChk(ierr); 283 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 284 CeedChk(ierr); 285 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 286 // Restrict block active input 287 if (vec == CEED_VECTOR_ACTIVE) { 288 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize, 289 CEED_NOTRANSPOSE, invec, 290 impl->evecsin[i], request); 291 CeedChk(ierr); 292 activein = 1; 293 } 294 // Basis action 295 switch(emode) { 296 case CEED_EVAL_NONE: 297 if (!activein) { 298 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 299 CEED_USE_POINTER, 300 &impl->edata[i][e*Q*size]); CeedChk(ierr); 301 } 302 break; 303 case CEED_EVAL_INTERP: 304 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 305 CeedChk(ierr); 306 if (!activein) { 307 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 308 CEED_USE_POINTER, 309 &impl->edata[i][e*elemsize*size]); 310 CeedChk(ierr); 311 } 312 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 313 CEED_EVAL_INTERP, impl->evecsin[i], 314 impl->qvecsin[i]); CeedChk(ierr); 315 break; 316 case CEED_EVAL_GRAD: 317 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 318 CeedChk(ierr); 319 if (!activein) { 320 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 321 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 322 CEED_USE_POINTER, 323 &impl->edata[i][e*elemsize*size/dim]); 324 CeedChk(ierr); 325 } 326 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 327 CEED_EVAL_GRAD, impl->evecsin[i], 328 impl->qvecsin[i]); CeedChk(ierr); 329 break; 330 case CEED_EVAL_WEIGHT: 331 break; // No action 332 // LCOV_EXCL_START 333 case CEED_EVAL_DIV: 334 case CEED_EVAL_CURL: { 335 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 336 CeedChk(ierr); 337 Ceed ceed; 338 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 339 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 340 // LCOV_EXCL_STOP 341 } 342 } 343 } 344 return 0; 345 } 346 347 //------------------------------------------------------------------------------ 348 // Output Basis Action 349 //------------------------------------------------------------------------------ 350 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 351 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 352 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 353 CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl, 354 CeedRequest *request) { 355 CeedInt ierr; 356 CeedElemRestriction Erestrict; 357 CeedEvalMode emode; 358 CeedBasis basis; 359 CeedVector vec; 360 361 for (CeedInt i=0; i<numoutputfields; i++) { 362 // Get elemsize, emode, size 363 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 364 CeedChk(ierr); 365 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 366 CeedChk(ierr); 367 // Basis action 368 switch(emode) { 369 case CEED_EVAL_NONE: 370 break; // No action 371 case CEED_EVAL_INTERP: 372 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 373 CeedChk(ierr); 374 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 375 CEED_EVAL_INTERP, impl->qvecsout[i], 376 impl->evecsout[i]); CeedChk(ierr); 377 break; 378 case CEED_EVAL_GRAD: 379 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 380 CeedChk(ierr); 381 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 382 CEED_EVAL_GRAD, impl->qvecsout[i], 383 impl->evecsout[i]); CeedChk(ierr); 384 break; 385 // LCOV_EXCL_START 386 case CEED_EVAL_WEIGHT: { 387 Ceed ceed; 388 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 389 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 390 "evaluation mode"); 391 } 392 case CEED_EVAL_DIV: 393 case CEED_EVAL_CURL: { 394 Ceed ceed; 395 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 396 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 397 // LCOV_EXCL_STOP 398 } 399 } 400 // Restrict output block 401 // Get output vector 402 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 403 if (vec == CEED_VECTOR_ACTIVE) 404 vec = outvec; 405 // Restrict 406 ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein], 407 e/blksize, CEED_TRANSPOSE, 408 impl->evecsout[i], vec, request); 409 CeedChk(ierr); 410 } 411 return 0; 412 } 413 414 //------------------------------------------------------------------------------ 415 // Restore Input Vectors 416 //------------------------------------------------------------------------------ 417 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields, 418 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 419 bool skipactive, CeedOperator_Opt *impl) { 420 CeedInt ierr; 421 CeedEvalMode emode; 422 423 for (CeedInt i=0; i<numinputfields; i++) { 424 // Skip active inputs 425 if (skipactive) { 426 CeedVector vec; 427 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 428 if (vec == CEED_VECTOR_ACTIVE) 429 continue; 430 } 431 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 432 CeedChk(ierr); 433 if (emode == CEED_EVAL_WEIGHT) { // Skip 434 } else { 435 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 436 (const CeedScalar **) &impl->edata[i]); 437 CeedChk(ierr); 438 } 439 } 440 return 0; 441 } 442 443 //------------------------------------------------------------------------------ 444 // Operator Apply 445 //------------------------------------------------------------------------------ 446 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec, 447 CeedVector outvec, CeedRequest *request) { 448 int ierr; 449 Ceed ceed; 450 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 451 Ceed_Opt *ceedimpl; 452 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 453 CeedInt blksize = ceedimpl->blksize; 454 CeedOperator_Opt *impl; 455 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 456 CeedInt Q, numinputfields, numoutputfields, numelements; 457 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 458 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 459 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 460 CeedQFunction qf; 461 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 462 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 463 CeedChk(ierr); 464 CeedOperatorField *opinputfields, *opoutputfields; 465 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 466 CeedChk(ierr); 467 CeedQFunctionField *qfinputfields, *qfoutputfields; 468 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 469 CeedChk(ierr); 470 CeedEvalMode emode; 471 472 // Setup 473 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 474 475 // Input Evecs and Restriction 476 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 477 opinputfields, invec, impl, request); 478 CeedChk(ierr); 479 480 // Output Lvecs, Evecs, and Qvecs 481 for (CeedInt i=0; i<numoutputfields; i++) { 482 // Set Qvec if needed 483 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 484 CeedChk(ierr); 485 if (emode == CEED_EVAL_NONE) { 486 // Set qvec to single block evec 487 ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST, 488 &impl->edata[i + numinputfields]); 489 CeedChk(ierr); 490 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 491 CEED_USE_POINTER, 492 impl->edata[i + numinputfields]); CeedChk(ierr); 493 ierr = CeedVectorRestoreArray(impl->evecsout[i], 494 &impl->edata[i + numinputfields]); 495 CeedChk(ierr); 496 } 497 } 498 499 // Loop through elements 500 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 501 // Input basis apply 502 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 503 numinputfields, blksize, invec, false, 504 impl, request); CeedChk(ierr); 505 506 // Q function 507 if (!impl->identityqf) { 508 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 509 CeedChk(ierr); 510 } 511 512 // Output basis apply and restrict 513 ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields, 514 blksize, numinputfields, numoutputfields, 515 op, outvec, impl, request); 516 CeedChk(ierr); 517 } 518 519 // Restore input arrays 520 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 521 opinputfields, false, impl); 522 CeedChk(ierr); 523 524 return 0; 525 } 526 527 //------------------------------------------------------------------------------ 528 // Assemble Linear QFunction 529 //------------------------------------------------------------------------------ 530 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op, 531 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 532 int ierr; 533 Ceed ceed; 534 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 535 Ceed_Opt *ceedimpl; 536 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 537 const CeedInt blksize = ceedimpl->blksize; 538 CeedOperator_Opt *impl; 539 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 540 CeedInt Q, numinputfields, numoutputfields, numelements, size; 541 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 542 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 543 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 544 CeedQFunction qf; 545 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 546 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 547 CeedChk(ierr); 548 CeedOperatorField *opinputfields, *opoutputfields; 549 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 550 CeedChk(ierr); 551 CeedQFunctionField *qfinputfields, *qfoutputfields; 552 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 553 CeedChk(ierr); 554 CeedVector vec, lvec; 555 CeedInt numactivein = 0, numactiveout = 0; 556 CeedVector *activein = NULL; 557 CeedScalar *a, *tmp; 558 559 // Setup 560 ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr); 561 562 // Check for identity 563 if (impl->identityqf) 564 // LCOV_EXCL_START 565 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 566 // LCOV_EXCL_STOP 567 568 // Input Evecs and Restriction 569 ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields, 570 opinputfields, NULL, impl, request); 571 CeedChk(ierr); 572 573 // Count number of active input fields 574 for (CeedInt i=0; i<numinputfields; i++) { 575 // Get input vector 576 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 577 // Check if active input 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 580 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 581 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 582 CeedChk(ierr); 583 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 584 for (CeedInt field=0; field<size; field++) { 585 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 586 CeedChk(ierr); 587 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 588 CEED_USE_POINTER, &tmp[field*Q*blksize]); 589 CeedChk(ierr); 590 } 591 numactivein += size; 592 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 593 } 594 } 595 596 // Count number of active output fields 597 for (CeedInt i=0; i<numoutputfields; i++) { 598 // Get output vector 599 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 600 // Check if active output 601 if (vec == CEED_VECTOR_ACTIVE) { 602 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 603 numactiveout += size; 604 } 605 } 606 607 // Check sizes 608 if (!numactivein || !numactiveout) 609 // LCOV_EXCL_START 610 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 611 "and outputs"); 612 // LCOV_EXCL_STOP 613 614 // Setup lvec 615 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 616 &lvec); CeedChk(ierr); 617 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 618 619 // Create output restriction 620 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 621 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, 622 numactivein*numactiveout, 623 numactivein*numactiveout*numelements*Q, 624 strides, rstr); CeedChk(ierr); 625 // Create assembled vector 626 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 627 assembled); CeedChk(ierr); 628 629 // Loop through elements 630 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 631 // Input basis apply 632 ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields, 633 numinputfields, blksize, NULL, true, 634 impl, request); CeedChk(ierr); 635 636 // Assemble QFunction 637 for (CeedInt in=0; in<numactivein; in++) { 638 // Set Inputs 639 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 640 if (numactivein > 1) { 641 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 642 0.0); CeedChk(ierr); 643 } 644 // Set Outputs 645 for (CeedInt out=0; out<numoutputfields; out++) { 646 // Get output vector 647 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 648 CeedChk(ierr); 649 // Check if active output 650 if (vec == CEED_VECTOR_ACTIVE) { 651 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 652 CEED_USE_POINTER, a); CeedChk(ierr); 653 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 654 CeedChk(ierr); 655 a += size*Q*blksize; // Advance the pointer by the size of the output 656 } 657 } 658 // Apply QFunction 659 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 660 CeedChk(ierr); 661 } 662 } 663 664 // Un-set output Qvecs to prevent accidental overwrite of Assembled 665 for (CeedInt out=0; out<numoutputfields; out++) { 666 // Get output vector 667 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 668 CeedChk(ierr); 669 // Check if active output 670 if (vec == CEED_VECTOR_ACTIVE) { 671 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 672 NULL); CeedChk(ierr); 673 } 674 } 675 676 // Restore input arrays 677 ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields, 678 opinputfields, true, impl); 679 CeedChk(ierr); 680 681 // Output blocked restriction 682 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 683 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 684 CeedElemRestriction blkrstr; 685 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 686 numactivein*numactiveout, numactivein*numactiveout*numelements*Q, 687 strides, &blkrstr); CeedChk(ierr); 688 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 689 request); CeedChk(ierr); 690 691 // Cleanup 692 for (CeedInt i=0; i<numactivein; i++) { 693 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 694 } 695 ierr = CeedFree(&activein); CeedChk(ierr); 696 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 697 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 698 699 return 0; 700 } 701 702 //------------------------------------------------------------------------------ 703 // Operator Destroy 704 //------------------------------------------------------------------------------ 705 static int CeedOperatorDestroy_Opt(CeedOperator op) { 706 int ierr; 707 CeedOperator_Opt *impl; 708 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 709 710 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 711 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 712 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 713 } 714 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 715 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 716 ierr = CeedFree(&impl->edata); CeedChk(ierr); 717 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 718 719 for (CeedInt i=0; i<impl->numein; i++) { 720 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 721 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 722 } 723 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 724 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 725 726 for (CeedInt i=0; i<impl->numeout; i++) { 727 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 728 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 729 } 730 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 731 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 732 733 ierr = CeedFree(&impl); CeedChk(ierr); 734 return 0; 735 } 736 737 //------------------------------------------------------------------------------ 738 // Operator Create 739 //------------------------------------------------------------------------------ 740 int CeedOperatorCreate_Opt(CeedOperator op) { 741 int ierr; 742 Ceed ceed; 743 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 744 Ceed_Opt *ceedimpl; 745 ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr); 746 CeedInt blksize = ceedimpl->blksize; 747 CeedOperator_Opt *impl; 748 749 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 750 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 751 752 if (blksize != 1 && blksize != 8) 753 // LCOV_EXCL_START 754 return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize); 755 // LCOV_EXCL_STOP 756 757 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 758 CeedOperatorAssembleLinearQFunction_Opt); 759 CeedChk(ierr); 760 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 761 CeedOperatorApply_Opt); CeedChk(ierr); 762 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 763 CeedOperatorDestroy_Opt); CeedChk(ierr); 764 return 0; 765 } 766 //------------------------------------------------------------------------------ 767