// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. // All Rights reserved. See files LICENSE and NOTICE for details. // // This file is part of CEED, a collection of benchmarks, miniapps, software // libraries and APIs for efficient high-order finite element and spectral // element discretizations for exascale applications. For more information and // source code availability see http://github.com/ceed. // // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, // a collaborative effort of two U.S. Department of Energy organizations (Office // of Science and the National Nuclear Security Administration) responsible for // the planning and preparation of a capable exascale ecosystem, including // software, applications, hardware, advanced system engineering and early // testbed platforms, in support of the nation's exascale computing imperative. #include "ceed-blocked.h" //------------------------------------------------------------------------------ // Setup Input/Output Fields //------------------------------------------------------------------------------ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bool inOrOut, CeedElemRestriction *blkrestr, CeedVector *fullevecs, CeedVector *evecs, CeedVector *qvecs, CeedInt starte, CeedInt numfields, CeedInt Q) { CeedInt dim, ierr, ncomp, size, P; Ceed ceed; ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); CeedBasis basis; CeedElemRestriction r; CeedOperatorField *opfields; CeedQFunctionField *qffields; if (inOrOut) { ierr = CeedOperatorGetFields(op, NULL, &opfields); CeedChk(ierr); ierr = CeedQFunctionGetFields(qf, NULL, &qffields); CeedChk(ierr); } else { ierr = CeedOperatorGetFields(op, &opfields, NULL); CeedChk(ierr); ierr = CeedQFunctionGetFields(qf, &qffields, NULL); CeedChk(ierr); } const CeedInt blksize = 8; // Loop over fields for (CeedInt i=0; iidentityqf); CeedChk(ierr); ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); CeedChk(ierr); CeedOperatorField *opinputfields, *opoutputfields; ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); CeedChk(ierr); CeedQFunctionField *qfinputfields, *qfoutputfields; ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); CeedChk(ierr); // Allocate ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); CeedChk(ierr); ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); CeedChk(ierr); ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); CeedChk(ierr); ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); impl->numein = numinputfields; impl->numeout = numoutputfields; // Set up infield and outfield pointer arrays // Infields ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, impl->evecs, impl->evecsin, impl->qvecsin, 0, numinputfields, Q); CeedChk(ierr); // Outfields ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, impl->evecs, impl->evecsout, impl->qvecsout, numinputfields, numoutputfields, Q); CeedChk(ierr); // Identity QFunctions if (impl->identityqf) { CeedEvalMode inmode, outmode; CeedQFunctionField *infields, *outfields; ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); for (CeedInt i=0; iqvecsout[i]); CeedChk(ierr); impl->qvecsout[i] = impl->qvecsin[i]; ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); } } ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); return 0; } //------------------------------------------------------------------------------ // Setup Operator Inputs //------------------------------------------------------------------------------ static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, CeedRequest *request) { CeedInt ierr; CeedEvalMode emode; CeedVector vec; uint64_t state; for (CeedInt i=0; iinputstate[i] || vec == invec) { ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, vec, impl->evecs[i], request); CeedChk(ierr); impl->inputstate[i] = state; } // Get evec ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); } } return 0; } //------------------------------------------------------------------------------ // Input Basis Action //------------------------------------------------------------------------------ static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, CeedInt numinputfields, CeedInt blksize, bool skipactive, CeedOperator_Blocked *impl) { CeedInt ierr; CeedInt dim, elemsize, size; CeedElemRestriction Erestrict; CeedEvalMode emode; CeedBasis basis; for (CeedInt i=0; iqvecsin[i], CEED_MEM_HOST, CEED_USE_POINTER, &impl->edata[i][e*Q*size]); CeedChk(ierr); break; case CEED_EVAL_INTERP: ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, CEED_USE_POINTER, &impl->edata[i][e*elemsize*size]); CeedChk(ierr); ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, impl->evecsin[i], impl->qvecsin[i]); CeedChk(ierr); break; case CEED_EVAL_GRAD: ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, CEED_USE_POINTER, &impl->edata[i][e*elemsize*size/dim]); CeedChk(ierr); ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, impl->evecsin[i], impl->qvecsin[i]); CeedChk(ierr); break; case CEED_EVAL_WEIGHT: break; // No action // LCOV_EXCL_START case CEED_EVAL_DIV: case CEED_EVAL_CURL: { ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); Ceed ceed; ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); // LCOV_EXCL_STOP } } } return 0; } //------------------------------------------------------------------------------ // Output Basis Action //------------------------------------------------------------------------------ static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, CeedOperator op, CeedOperator_Blocked *impl) { CeedInt ierr; CeedInt dim, elemsize, size; CeedElemRestriction Erestrict; CeedEvalMode emode; CeedBasis basis; for (CeedInt i=0; ievecsout[i], CEED_MEM_HOST, CEED_USE_POINTER, &impl->edata[i + numinputfields][e*elemsize*size]); CeedChk(ierr); ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, CEED_EVAL_INTERP, impl->qvecsout[i], impl->evecsout[i]); CeedChk(ierr); break; case CEED_EVAL_GRAD: ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); CeedChk(ierr); ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, CEED_USE_POINTER, &impl->edata[i + numinputfields][e*elemsize*size/dim]); CeedChk(ierr); ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, CEED_EVAL_GRAD, impl->qvecsout[i], impl->evecsout[i]); CeedChk(ierr); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { Ceed ceed; ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " "evaluation mode"); } case CEED_EVAL_DIV: case CEED_EVAL_CURL: { Ceed ceed; ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); // LCOV_EXCL_STOP } } } return 0; } //------------------------------------------------------------------------------ // Restore Input Vectors //------------------------------------------------------------------------------ static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, bool skipactive, CeedOperator_Blocked *impl) { CeedInt ierr; CeedEvalMode emode; for (CeedInt i=0; ievecs[i], (const CeedScalar **) &impl->edata[i]); CeedChk(ierr); } } return 0; } //------------------------------------------------------------------------------ // Operator Apply //------------------------------------------------------------------------------ static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, CeedVector outvec, CeedRequest *request) { int ierr; CeedOperator_Blocked *impl; ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); const CeedInt blksize = 8; CeedInt Q, numinputfields, numoutputfields, numelements, size; ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); CeedQFunction qf; ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); CeedChk(ierr); CeedOperatorField *opinputfields, *opoutputfields; ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); CeedChk(ierr); CeedQFunctionField *qfinputfields, *qfoutputfields; ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); CeedChk(ierr); CeedEvalMode emode; CeedVector vec; // Setup ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); // Input Evecs and Restriction ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, opinputfields, invec, false, impl, request); CeedChk(ierr); // Output Evecs for (CeedInt i=0; ievecs[i+impl->numein], CEED_MEM_HOST, &impl->edata[i + numinputfields]); CeedChk(ierr); } // Loop through elements for (CeedInt e=0; eqvecsout[i], CEED_MEM_HOST, CEED_USE_POINTER, &impl->edata[i + numinputfields][e*Q*size]); CeedChk(ierr); } } // Input basis apply ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, numinputfields, blksize, false, impl); CeedChk(ierr); // Q function if (!impl->identityqf) { ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); CeedChk(ierr); } // Output basis apply ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, blksize, numinputfields, numoutputfields, op, impl); CeedChk(ierr); } // Output restriction for (CeedInt i=0; ievecs[i+impl->numein], &impl->edata[i + numinputfields]); CeedChk(ierr); // Get output vector ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); // Active if (vec == CEED_VECTOR_ACTIVE) vec = outvec; // Restrict ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE, impl->evecs[i+impl->numein], vec, request); CeedChk(ierr); } // Restore input arrays ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, opinputfields, false, impl); CeedChk(ierr); return 0; } //------------------------------------------------------------------------------ // Assemble Linear QFunction //------------------------------------------------------------------------------ static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { int ierr; CeedOperator_Blocked *impl; ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); const CeedInt blksize = 8; CeedInt Q, numinputfields, numoutputfields, numelements, size; ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); CeedQFunction qf; ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); CeedChk(ierr); CeedOperatorField *opinputfields, *opoutputfields; ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); CeedChk(ierr); CeedQFunctionField *qfinputfields, *qfoutputfields; ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); CeedChk(ierr); CeedVector vec, lvec; CeedInt numactivein = 0, numactiveout = 0; CeedVector *activein = NULL; CeedScalar *a, *tmp; Ceed ceed; ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); // Setup ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); // Check for identity if (impl->identityqf) // LCOV_EXCL_START return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); // LCOV_EXCL_STOP // Input Evecs and Restriction ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, opinputfields, NULL, true, impl, request); CeedChk(ierr); // Count number of active input fields for (CeedInt i=0; iqvecsin[i], 0.0); CeedChk(ierr); ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); CeedChk(ierr); ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); for (CeedInt field=0; fieldqvecsin[i], &tmp); CeedChk(ierr); } } // Count number of active output fields for (CeedInt i=0; i 1) { ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 0.0); CeedChk(ierr); } // Set Outputs for (CeedInt out=0; outqvecsout[out], CEED_MEM_HOST, CEED_USE_POINTER, a); CeedChk(ierr); ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); CeedChk(ierr); a += size*Q*blksize; // Advance the pointer by the size of the output } } // Apply QFunction ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); CeedChk(ierr); } } // Un-set output Qvecs to prevent accidental overwrite of Assembled for (CeedInt out=0; outqvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, NULL); CeedChk(ierr); } } // Restore input arrays ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, opinputfields, true, impl); CeedChk(ierr); // Output blocked restriction ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); CeedElemRestriction blkrstr; ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, numactivein*numactiveout, numactivein*numactiveout*numelements*Q, strides, &blkrstr); CeedChk(ierr); ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, request); CeedChk(ierr); // Cleanup for (CeedInt i=0; inumein+impl->numeout; i++) { ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); } ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); ierr = CeedFree(&impl->evecs); CeedChk(ierr); ierr = CeedFree(&impl->edata); CeedChk(ierr); ierr = CeedFree(&impl->inputstate); CeedChk(ierr); for (CeedInt i=0; inumein; i++) { ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); } ierr = CeedFree(&impl->evecsin); CeedChk(ierr); ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); for (CeedInt i=0; inumeout; i++) { ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); } ierr = CeedFree(&impl->evecsout); CeedChk(ierr); ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); ierr = CeedFree(&impl); CeedChk(ierr); return 0; } //------------------------------------------------------------------------------ // Operator Create //------------------------------------------------------------------------------ int CeedOperatorCreate_Blocked(CeedOperator op) { int ierr; Ceed ceed; ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); CeedOperator_Blocked *impl; ierr = CeedCalloc(1, &impl); CeedChk(ierr); ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", CeedOperatorAssembleLinearQFunction_Blocked); CeedChk(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApply_Blocked); CeedChk(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Blocked); CeedChk(ierr); return 0; } //------------------------------------------------------------------------------