1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed.h> 18 #include <ceed-backend.h> 19 #include <stdbool.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 #include "ceed-blocked.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 28 CeedOperator op, bool inOrOut, 29 CeedElemRestriction *blkrestr, 30 CeedVector *fullevecs, CeedVector *evecs, 31 CeedVector *qvecs, CeedInt starte, 32 CeedInt numfields, CeedInt Q) { 33 CeedInt dim, ierr, ncomp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *opfields; 39 CeedQFunctionField *qffields; 40 if (inOrOut) { 41 ierr = CeedOperatorGetFields(op, NULL, &opfields); 42 CeedChkBackend(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 44 CeedChkBackend(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, &opfields, NULL); 47 CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 49 CeedChkBackend(ierr); 50 } 51 const CeedInt blksize = 8; 52 53 // Loop over fields 54 for (CeedInt i=0; i<numfields; i++) { 55 CeedEvalMode emode; 56 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChkBackend(ierr); 57 58 if (emode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 60 CeedChkBackend(ierr); 61 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 62 CeedInt nelem, elemsize, lsize, compstride; 63 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChkBackend(ierr); 64 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChkBackend(ierr); 67 68 bool strided; 69 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 70 if (strided) { 71 CeedInt strides[3]; 72 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 73 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 74 blksize, ncomp, lsize, strides, &blkrestr[i+starte]); 75 CeedChkBackend(ierr); 76 } else { 77 const CeedInt *offsets = NULL; 78 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 79 CeedChkBackend(ierr); 80 ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChkBackend(ierr); 81 ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize, 82 blksize, ncomp, compstride, 83 lsize, CEED_MEM_HOST, 84 CEED_COPY_VALUES, offsets, 85 &blkrestr[i+starte]); 86 CeedChkBackend(ierr); 87 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 88 } 89 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 90 &fullevecs[i+starte]); 91 CeedChkBackend(ierr); 92 } 93 94 switch(emode) { 95 case CEED_EVAL_NONE: 96 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr); 97 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr); 98 break; 99 case CEED_EVAL_INTERP: 100 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr); 101 ierr = CeedElemRestrictionGetElementSize(r, &P); 102 CeedChkBackend(ierr); 103 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChkBackend(ierr); 104 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr); 105 break; 106 case CEED_EVAL_GRAD: 107 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr); 108 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr); 109 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 110 ierr = CeedElemRestrictionGetElementSize(r, &P); 111 CeedChkBackend(ierr); 112 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); 113 CeedChkBackend(ierr); 114 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr); 115 break; 116 case CEED_EVAL_WEIGHT: // Only on input fields 117 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr); 118 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChkBackend(ierr); 119 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 120 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 121 CeedChkBackend(ierr); 122 123 break; 124 case CEED_EVAL_DIV: 125 break; // Not implemented 126 case CEED_EVAL_CURL: 127 break; // Not implemented 128 } 129 } 130 return CEED_ERROR_SUCCESS; 131 } 132 133 //------------------------------------------------------------------------------ 134 // Setup Operator 135 //------------------------------------------------------------------------------ 136 static int CeedOperatorSetup_Blocked(CeedOperator op) { 137 int ierr; 138 bool setupdone; 139 ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChkBackend(ierr); 140 if (setupdone) return CEED_ERROR_SUCCESS; 141 Ceed ceed; 142 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 143 CeedOperator_Blocked *impl; 144 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 145 CeedQFunction qf; 146 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 147 CeedInt Q, numinputfields, numoutputfields; 148 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 149 ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChkBackend(ierr); 150 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 151 CeedChkBackend(ierr); 152 CeedOperatorField *opinputfields, *opoutputfields; 153 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 154 CeedChkBackend(ierr); 155 CeedQFunctionField *qfinputfields, *qfoutputfields; 156 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 157 CeedChkBackend(ierr); 158 159 // Allocate 160 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 161 CeedChkBackend(ierr); 162 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 163 CeedChkBackend(ierr); 164 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 165 CeedChkBackend(ierr); 166 167 ierr = CeedCalloc(16, &impl->inputstate); CeedChkBackend(ierr); 168 ierr = CeedCalloc(16, &impl->evecsin); CeedChkBackend(ierr); 169 ierr = CeedCalloc(16, &impl->evecsout); CeedChkBackend(ierr); 170 ierr = CeedCalloc(16, &impl->qvecsin); CeedChkBackend(ierr); 171 ierr = CeedCalloc(16, &impl->qvecsout); CeedChkBackend(ierr); 172 173 impl->numein = numinputfields; impl->numeout = numoutputfields; 174 175 // Set up infield and outfield pointer arrays 176 // Infields 177 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 178 impl->evecs, impl->evecsin, 179 impl->qvecsin, 0, 180 numinputfields, Q); 181 CeedChkBackend(ierr); 182 // Outfields 183 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 184 impl->evecs, impl->evecsout, 185 impl->qvecsout, numinputfields, 186 numoutputfields, Q); 187 CeedChkBackend(ierr); 188 189 // Identity QFunctions 190 if (impl->identityqf) { 191 CeedEvalMode inmode, outmode; 192 CeedQFunctionField *infields, *outfields; 193 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChkBackend(ierr); 194 195 for (CeedInt i=0; i<numinputfields; i++) { 196 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 197 CeedChkBackend(ierr); 198 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 199 CeedChkBackend(ierr); 200 201 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr); 202 impl->qvecsout[i] = impl->qvecsin[i]; 203 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChkBackend(ierr); 204 } 205 } 206 207 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 208 209 return CEED_ERROR_SUCCESS; 210 } 211 212 //------------------------------------------------------------------------------ 213 // Setup Operator Inputs 214 //------------------------------------------------------------------------------ 215 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 216 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 217 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 218 CeedRequest *request) { 219 CeedInt ierr; 220 CeedEvalMode emode; 221 CeedVector vec; 222 uint64_t state; 223 224 for (CeedInt i=0; i<numinputfields; i++) { 225 // Get input vector 226 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 227 if (vec == CEED_VECTOR_ACTIVE) { 228 if (skipactive) 229 continue; 230 else 231 vec = invec; 232 } 233 234 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 235 CeedChkBackend(ierr); 236 if (emode == CEED_EVAL_WEIGHT) { // Skip 237 } else { 238 // Restrict 239 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 240 if (state != impl->inputstate[i] || vec == invec) { 241 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 242 vec, impl->evecs[i], request); 243 CeedChkBackend(ierr); 244 impl->inputstate[i] = state; 245 } 246 // Get evec 247 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 248 (const CeedScalar **) &impl->edata[i]); 249 CeedChkBackend(ierr); 250 } 251 } 252 return CEED_ERROR_SUCCESS; 253 } 254 255 //------------------------------------------------------------------------------ 256 // Input Basis Action 257 //------------------------------------------------------------------------------ 258 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 259 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 260 CeedInt numinputfields, CeedInt blksize, bool skipactive, 261 CeedOperator_Blocked *impl) { 262 CeedInt ierr; 263 CeedInt dim, elemsize, size; 264 CeedElemRestriction Erestrict; 265 CeedEvalMode emode; 266 CeedBasis basis; 267 268 for (CeedInt i=0; i<numinputfields; i++) { 269 // Skip active input 270 if (skipactive) { 271 CeedVector vec; 272 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 273 if (vec == CEED_VECTOR_ACTIVE) 274 continue; 275 } 276 277 // Get elemsize, emode, size 278 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 279 CeedChkBackend(ierr); 280 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 281 CeedChkBackend(ierr); 282 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 283 CeedChkBackend(ierr); 284 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr); 285 // Basis action 286 switch(emode) { 287 case CEED_EVAL_NONE: 288 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 289 CEED_USE_POINTER, 290 &impl->edata[i][e*Q*size]); CeedChkBackend(ierr); 291 break; 292 case CEED_EVAL_INTERP: 293 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 294 CeedChkBackend(ierr); 295 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 296 CEED_USE_POINTER, 297 &impl->edata[i][e*elemsize*size]); 298 CeedChkBackend(ierr); 299 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 300 CEED_EVAL_INTERP, impl->evecsin[i], 301 impl->qvecsin[i]); CeedChkBackend(ierr); 302 break; 303 case CEED_EVAL_GRAD: 304 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 305 CeedChkBackend(ierr); 306 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 307 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 308 CEED_USE_POINTER, 309 &impl->edata[i][e*elemsize*size/dim]); 310 CeedChkBackend(ierr); 311 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 312 CEED_EVAL_GRAD, impl->evecsin[i], 313 impl->qvecsin[i]); CeedChkBackend(ierr); 314 break; 315 case CEED_EVAL_WEIGHT: 316 break; // No action 317 // LCOV_EXCL_START 318 case CEED_EVAL_DIV: 319 case CEED_EVAL_CURL: { 320 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 321 CeedChkBackend(ierr); 322 Ceed ceed; 323 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 324 return CeedError(ceed, CEED_ERROR_BACKEND, 325 "Ceed evaluation mode not implemented"); 326 // LCOV_EXCL_STOP 327 } 328 } 329 } 330 return CEED_ERROR_SUCCESS; 331 } 332 333 //------------------------------------------------------------------------------ 334 // Output Basis Action 335 //------------------------------------------------------------------------------ 336 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 337 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 338 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 339 CeedOperator op, CeedOperator_Blocked *impl) { 340 CeedInt ierr; 341 CeedInt dim, elemsize, size; 342 CeedElemRestriction Erestrict; 343 CeedEvalMode emode; 344 CeedBasis basis; 345 346 for (CeedInt i=0; i<numoutputfields; i++) { 347 // Get elemsize, emode, size 348 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 349 CeedChkBackend(ierr); 350 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 351 CeedChkBackend(ierr); 352 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 353 CeedChkBackend(ierr); 354 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 355 CeedChkBackend(ierr); 356 // Basis action 357 switch(emode) { 358 case CEED_EVAL_NONE: 359 break; // No action 360 case CEED_EVAL_INTERP: 361 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 362 CeedChkBackend(ierr); 363 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 364 CEED_USE_POINTER, 365 &impl->edata[i + numinputfields][e*elemsize*size]); 366 CeedChkBackend(ierr); 367 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 368 CEED_EVAL_INTERP, impl->qvecsout[i], 369 impl->evecsout[i]); CeedChkBackend(ierr); 370 break; 371 case CEED_EVAL_GRAD: 372 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 373 CeedChkBackend(ierr); 374 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 375 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 376 CEED_USE_POINTER, 377 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 378 CeedChkBackend(ierr); 379 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 380 CEED_EVAL_GRAD, impl->qvecsout[i], 381 impl->evecsout[i]); CeedChkBackend(ierr); 382 break; 383 // LCOV_EXCL_START 384 case CEED_EVAL_WEIGHT: { 385 Ceed ceed; 386 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 387 return CeedError(ceed, CEED_ERROR_BACKEND, 388 "CEED_EVAL_WEIGHT cannot be an output " 389 "evaluation mode"); 390 } 391 case CEED_EVAL_DIV: 392 case CEED_EVAL_CURL: { 393 Ceed ceed; 394 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 395 return CeedError(ceed, CEED_ERROR_BACKEND, 396 "Ceed evaluation mode not implemented"); 397 // LCOV_EXCL_STOP 398 } 399 } 400 } 401 return CEED_ERROR_SUCCESS; 402 } 403 404 //------------------------------------------------------------------------------ 405 // Restore Input Vectors 406 //------------------------------------------------------------------------------ 407 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 408 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 409 bool skipactive, CeedOperator_Blocked *impl) { 410 CeedInt ierr; 411 CeedEvalMode emode; 412 413 for (CeedInt i=0; i<numinputfields; i++) { 414 // Skip active inputs 415 if (skipactive) { 416 CeedVector vec; 417 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 418 if (vec == CEED_VECTOR_ACTIVE) 419 continue; 420 } 421 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 422 CeedChkBackend(ierr); 423 if (emode == CEED_EVAL_WEIGHT) { // Skip 424 } else { 425 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 426 (const CeedScalar **) &impl->edata[i]); 427 CeedChkBackend(ierr); 428 } 429 } 430 return CEED_ERROR_SUCCESS; 431 } 432 433 //------------------------------------------------------------------------------ 434 // Operator Apply 435 //------------------------------------------------------------------------------ 436 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector invec, 437 CeedVector outvec, 438 CeedRequest *request) { 439 int ierr; 440 CeedOperator_Blocked *impl; 441 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 442 const CeedInt blksize = 8; 443 CeedInt Q, numinputfields, numoutputfields, numelements, size; 444 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); 445 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 446 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 447 CeedQFunction qf; 448 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 449 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 450 CeedChkBackend(ierr); 451 CeedOperatorField *opinputfields, *opoutputfields; 452 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 453 CeedChkBackend(ierr); 454 CeedQFunctionField *qfinputfields, *qfoutputfields; 455 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 456 CeedChkBackend(ierr); 457 CeedEvalMode emode; 458 CeedVector vec; 459 460 // Setup 461 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 462 463 // Input Evecs and Restriction 464 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 465 opinputfields, invec, false, impl, 466 request); CeedChkBackend(ierr); 467 468 // Output Evecs 469 for (CeedInt i=0; i<numoutputfields; i++) { 470 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 471 &impl->edata[i + numinputfields]); CeedChkBackend(ierr); 472 } 473 474 // Loop through elements 475 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 476 // Output pointers 477 for (CeedInt i=0; i<numoutputfields; i++) { 478 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 479 CeedChkBackend(ierr); 480 if (emode == CEED_EVAL_NONE) { 481 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 482 CeedChkBackend(ierr); 483 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 484 CEED_USE_POINTER, 485 &impl->edata[i + numinputfields][e*Q*size]); 486 CeedChkBackend(ierr); 487 } 488 } 489 490 // Input basis apply 491 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 492 numinputfields, blksize, false, impl); 493 CeedChkBackend(ierr); 494 495 // Q function 496 if (!impl->identityqf) { 497 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 498 CeedChkBackend(ierr); 499 } 500 501 // Output basis apply 502 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 503 blksize, numinputfields, 504 numoutputfields, op, impl); 505 CeedChkBackend(ierr); 506 } 507 508 // Output restriction 509 for (CeedInt i=0; i<numoutputfields; i++) { 510 // Restore evec 511 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 512 &impl->edata[i + numinputfields]); CeedChkBackend(ierr); 513 // Get output vector 514 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 515 CeedChkBackend(ierr); 516 // Active 517 if (vec == CEED_VECTOR_ACTIVE) 518 vec = outvec; 519 // Restrict 520 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], 521 CEED_TRANSPOSE, impl->evecs[i+impl->numein], 522 vec, request); CeedChkBackend(ierr); 523 524 } 525 526 // Restore input arrays 527 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 528 opinputfields, false, impl); CeedChkBackend(ierr); 529 530 return CEED_ERROR_SUCCESS; 531 } 532 533 //------------------------------------------------------------------------------ 534 // Assemble Linear QFunction 535 //------------------------------------------------------------------------------ 536 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 537 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 538 int ierr; 539 CeedOperator_Blocked *impl; 540 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 541 const CeedInt blksize = 8; 542 CeedInt Q, numinputfields, numoutputfields, numelements, size; 543 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); 544 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 545 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 546 CeedQFunction qf; 547 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 548 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 549 CeedChkBackend(ierr); 550 CeedOperatorField *opinputfields, *opoutputfields; 551 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 552 CeedChkBackend(ierr); 553 CeedQFunctionField *qfinputfields, *qfoutputfields; 554 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 555 CeedChkBackend(ierr); 556 CeedVector vec, lvec; 557 CeedInt numactivein = 0, numactiveout = 0; 558 CeedVector *activein = NULL; 559 CeedScalar *a, *tmp; 560 Ceed ceed; 561 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 562 563 // Setup 564 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 565 566 // Check for identity 567 if (impl->identityqf) 568 // LCOV_EXCL_START 569 return CeedError(ceed, CEED_ERROR_BACKEND, 570 "Assembling identity qfunctions not supported"); 571 // LCOV_EXCL_STOP 572 573 // Input Evecs and Restriction 574 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 575 opinputfields, NULL, true, impl, 576 request); CeedChkBackend(ierr); 577 578 // Count number of active input fields 579 for (CeedInt i=0; i<numinputfields; i++) { 580 // Get input vector 581 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 582 // Check if active input 583 if (vec == CEED_VECTOR_ACTIVE) { 584 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr); 585 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChkBackend(ierr); 586 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 587 CeedChkBackend(ierr); 588 ierr = CeedRealloc(numactivein + size, &activein); CeedChkBackend(ierr); 589 for (CeedInt field=0; field<size; field++) { 590 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 591 CeedChkBackend(ierr); 592 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 593 CEED_USE_POINTER, &tmp[field*Q*blksize]); 594 CeedChkBackend(ierr); 595 } 596 numactivein += size; 597 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChkBackend(ierr); 598 } 599 } 600 601 // Count number of active output fields 602 for (CeedInt i=0; i<numoutputfields; i++) { 603 // Get output vector 604 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 605 CeedChkBackend(ierr); 606 // Check if active output 607 if (vec == CEED_VECTOR_ACTIVE) { 608 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 609 CeedChkBackend(ierr); 610 numactiveout += size; 611 } 612 } 613 614 // Check sizes 615 if (!numactivein || !numactiveout) 616 // LCOV_EXCL_START 617 return CeedError(ceed, CEED_ERROR_BACKEND, 618 "Cannot assemble QFunction without active inputs " 619 "and outputs"); 620 // LCOV_EXCL_STOP 621 622 // Setup lvec 623 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 624 &lvec); CeedChkBackend(ierr); 625 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 626 627 // Create output restriction 628 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 629 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, 630 numactivein*numactiveout, 631 numactivein*numactiveout*numelements*Q, 632 strides, rstr); CeedChkBackend(ierr); 633 // Create assembled vector 634 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 635 assembled); CeedChkBackend(ierr); 636 637 // Loop through elements 638 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 639 // Input basis apply 640 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 641 numinputfields, blksize, true, impl); 642 CeedChkBackend(ierr); 643 644 // Assemble QFunction 645 for (CeedInt in=0; in<numactivein; in++) { 646 // Set Inputs 647 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChkBackend(ierr); 648 if (numactivein > 1) { 649 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 650 0.0); CeedChkBackend(ierr); 651 } 652 // Set Outputs 653 for (CeedInt out=0; out<numoutputfields; out++) { 654 // Get output vector 655 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 656 CeedChkBackend(ierr); 657 // Check if active output 658 if (vec == CEED_VECTOR_ACTIVE) { 659 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 660 CEED_USE_POINTER, a); CeedChkBackend(ierr); 661 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 662 CeedChkBackend(ierr); 663 a += size*Q*blksize; // Advance the pointer by the size of the output 664 } 665 } 666 // Apply QFunction 667 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 668 CeedChkBackend(ierr); 669 } 670 } 671 672 // Un-set output Qvecs to prevent accidental overwrite of Assembled 673 for (CeedInt out=0; out<numoutputfields; out++) { 674 // Get output vector 675 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 676 CeedChkBackend(ierr); 677 // Check if active output 678 if (vec == CEED_VECTOR_ACTIVE) { 679 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 680 NULL); CeedChkBackend(ierr); 681 } 682 } 683 684 // Restore input arrays 685 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 686 opinputfields, true, impl); CeedChkBackend(ierr); 687 688 // Output blocked restriction 689 ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr); 690 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 691 CeedElemRestriction blkrstr; 692 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 693 numactivein*numactiveout, numactivein*numactiveout*numelements*Q, 694 strides, &blkrstr); CeedChkBackend(ierr); 695 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 696 request); CeedChkBackend(ierr); 697 698 // Cleanup 699 for (CeedInt i=0; i<numactivein; i++) { 700 ierr = CeedVectorDestroy(&activein[i]); CeedChkBackend(ierr); 701 } 702 ierr = CeedFree(&activein); CeedChkBackend(ierr); 703 ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr); 704 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChkBackend(ierr); 705 706 return CEED_ERROR_SUCCESS; 707 } 708 709 //------------------------------------------------------------------------------ 710 // Operator Destroy 711 //------------------------------------------------------------------------------ 712 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 713 int ierr; 714 CeedOperator_Blocked *impl; 715 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 716 717 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 718 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChkBackend(ierr); 719 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChkBackend(ierr); 720 } 721 ierr = CeedFree(&impl->blkrestr); CeedChkBackend(ierr); 722 ierr = CeedFree(&impl->evecs); CeedChkBackend(ierr); 723 ierr = CeedFree(&impl->edata); CeedChkBackend(ierr); 724 ierr = CeedFree(&impl->inputstate); CeedChkBackend(ierr); 725 726 for (CeedInt i=0; i<impl->numein; i++) { 727 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChkBackend(ierr); 728 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChkBackend(ierr); 729 } 730 ierr = CeedFree(&impl->evecsin); CeedChkBackend(ierr); 731 ierr = CeedFree(&impl->qvecsin); CeedChkBackend(ierr); 732 733 for (CeedInt i=0; i<impl->numeout; i++) { 734 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChkBackend(ierr); 735 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr); 736 } 737 ierr = CeedFree(&impl->evecsout); CeedChkBackend(ierr); 738 ierr = CeedFree(&impl->qvecsout); CeedChkBackend(ierr); 739 740 ierr = CeedFree(&impl); CeedChkBackend(ierr); 741 return CEED_ERROR_SUCCESS; 742 } 743 744 //------------------------------------------------------------------------------ 745 // Operator Create 746 //------------------------------------------------------------------------------ 747 int CeedOperatorCreate_Blocked(CeedOperator op) { 748 int ierr; 749 Ceed ceed; 750 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 751 CeedOperator_Blocked *impl; 752 753 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 754 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 755 756 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 757 CeedOperatorLinearAssembleQFunction_Blocked); 758 CeedChkBackend(ierr); 759 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 760 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr); 761 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 762 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr); 763 return CEED_ERROR_SUCCESS; 764 } 765 //------------------------------------------------------------------------------ 766