107b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 207b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 307b31e0eSJeremy L Thompson // 407b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 507b31e0eSJeremy L Thompson // 607b31e0eSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 707b31e0eSJeremy L Thompson 8*c9c2c079SJeremy L Thompson #include <ceed.h> 907b31e0eSJeremy L Thompson 1007b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 1107b31e0eSJeremy L Thompson // Diagonal assembly kernels 1207b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 1307b31e0eSJeremy L Thompson typedef enum { 1407b31e0eSJeremy L Thompson /// Perform no evaluation (either because there is no data or it is already at 1507b31e0eSJeremy L Thompson /// quadrature points) 1607b31e0eSJeremy L Thompson CEED_EVAL_NONE = 0, 1707b31e0eSJeremy L Thompson /// Interpolate from nodes to quadrature points 1807b31e0eSJeremy L Thompson CEED_EVAL_INTERP = 1, 1907b31e0eSJeremy L Thompson /// Evaluate gradients at quadrature points from input in a nodal basis 2007b31e0eSJeremy L Thompson CEED_EVAL_GRAD = 2, 2107b31e0eSJeremy L Thompson /// Evaluate divergence at quadrature points from input in a nodal basis 2207b31e0eSJeremy L Thompson CEED_EVAL_DIV = 4, 2307b31e0eSJeremy L Thompson /// Evaluate curl at quadrature points from input in a nodal basis 2407b31e0eSJeremy L Thompson CEED_EVAL_CURL = 8, 2507b31e0eSJeremy L Thompson /// Using no input, evaluate quadrature weights on the reference element 2607b31e0eSJeremy L Thompson CEED_EVAL_WEIGHT = 16, 2707b31e0eSJeremy L Thompson } CeedEvalMode; 2807b31e0eSJeremy L Thompson 2907b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 3007b31e0eSJeremy L Thompson // Get Basis Emode Pointer 3107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 3207b31e0eSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr, 3307b31e0eSJeremy L Thompson CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp, 3407b31e0eSJeremy L Thompson const CeedScalar *grad) { 3507b31e0eSJeremy L Thompson switch (emode) { 3607b31e0eSJeremy L Thompson case CEED_EVAL_NONE: 3707b31e0eSJeremy L Thompson *basisptr = identity; 3807b31e0eSJeremy L Thompson break; 3907b31e0eSJeremy L Thompson case CEED_EVAL_INTERP: 4007b31e0eSJeremy L Thompson *basisptr = interp; 4107b31e0eSJeremy L Thompson break; 4207b31e0eSJeremy L Thompson case CEED_EVAL_GRAD: 4307b31e0eSJeremy L Thompson *basisptr = grad; 4407b31e0eSJeremy L Thompson break; 4507b31e0eSJeremy L Thompson case CEED_EVAL_WEIGHT: 4607b31e0eSJeremy L Thompson case CEED_EVAL_DIV: 4707b31e0eSJeremy L Thompson case CEED_EVAL_CURL: 4807b31e0eSJeremy L Thompson break; // Caught by QF Assembly 4907b31e0eSJeremy L Thompson } 5007b31e0eSJeremy L Thompson } 5107b31e0eSJeremy L Thompson 5207b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 5307b31e0eSJeremy L Thompson // Core code for diagonal assembly 5407b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 5507b31e0eSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, 5669bf922dSJeremy L Thompson const bool pointBlock, const CeedScalar *identity, 5707b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 5807b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 5907b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 6007b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 6107b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 6207b31e0eSJeremy L Thompson const int tid = threadIdx.x; // running with P threads, tid is evec node 6369bf922dSJeremy L Thompson if (tid >= NNODES) return; 6407b31e0eSJeremy L Thompson 6507b31e0eSJeremy L Thompson // Compute the diagonal of B^T D B 6607b31e0eSJeremy L Thompson // Each element 6707b31e0eSJeremy L Thompson for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem; 6807b31e0eSJeremy L Thompson e += gridDim.x*blockDim.z) { 6907b31e0eSJeremy L Thompson CeedInt dout = -1; 7007b31e0eSJeremy L Thompson // Each basis eval mode pair 7107b31e0eSJeremy L Thompson for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) { 7207b31e0eSJeremy L Thompson const CeedScalar *bt = NULL; 7307b31e0eSJeremy L Thompson if (emodeout[eout] == CEED_EVAL_GRAD) 7407b31e0eSJeremy L Thompson dout += 1; 7507b31e0eSJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout, 7607b31e0eSJeremy L Thompson &gradout[dout*NQPTS*NNODES]); 7707b31e0eSJeremy L Thompson CeedInt din = -1; 7807b31e0eSJeremy L Thompson for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) { 7907b31e0eSJeremy L Thompson const CeedScalar *b = NULL; 8007b31e0eSJeremy L Thompson if (emodein[ein] == CEED_EVAL_GRAD) 8107b31e0eSJeremy L Thompson din += 1; 8207b31e0eSJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin, 8307b31e0eSJeremy L Thompson &gradin[din*NQPTS*NNODES]); 8407b31e0eSJeremy L Thompson // Each component 8507b31e0eSJeremy L Thompson for (CeedInt compOut = 0; compOut < NCOMP; compOut++) { 8607b31e0eSJeremy L Thompson // Each qpoint/node pair 8707b31e0eSJeremy L Thompson if (pointBlock) { 8807b31e0eSJeremy L Thompson // Point Block Diagonal 8907b31e0eSJeremy L Thompson for (CeedInt compIn = 0; compIn < NCOMP; compIn++) { 9007b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 9107b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 9207b31e0eSJeremy L Thompson const CeedScalar qfvalue = 9307b31e0eSJeremy L Thompson assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* 9407b31e0eSJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 9507b31e0eSJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 9607b31e0eSJeremy L Thompson } 9707b31e0eSJeremy L Thompson elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; 9807b31e0eSJeremy L Thompson } 9907b31e0eSJeremy L Thompson } else { 10007b31e0eSJeremy L Thompson // Diagonal Only 10107b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 10207b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 10307b31e0eSJeremy L Thompson const CeedScalar qfvalue = 10407b31e0eSJeremy L Thompson assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* 10507b31e0eSJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 10607b31e0eSJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 10707b31e0eSJeremy L Thompson } 10807b31e0eSJeremy L Thompson elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; 10907b31e0eSJeremy L Thompson } 11007b31e0eSJeremy L Thompson } 11107b31e0eSJeremy L Thompson } 11207b31e0eSJeremy L Thompson } 11307b31e0eSJeremy L Thompson } 11407b31e0eSJeremy L Thompson } 11507b31e0eSJeremy L Thompson 11607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 11707b31e0eSJeremy L Thompson // Linear diagonal 11807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 11907b31e0eSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, 12069bf922dSJeremy L Thompson const CeedScalar *identity, 12107b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 12207b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 12307b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 12407b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 12507b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 12669bf922dSJeremy L Thompson diagonalCore(nelem, false, identity, interpin, gradin, interpout, 12707b31e0eSJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 12807b31e0eSJeremy L Thompson } 12907b31e0eSJeremy L Thompson 13007b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 13107b31e0eSJeremy L Thompson // Linear point block diagonal 13207b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 13307b31e0eSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, 13469bf922dSJeremy L Thompson const CeedScalar *identity, 13507b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 13607b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 13707b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 13807b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 13907b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 14069bf922dSJeremy L Thompson diagonalCore(nelem, true, identity, interpin, gradin, interpout, 14107b31e0eSJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 14207b31e0eSJeremy L Thompson } 14307b31e0eSJeremy L Thompson 14407b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 145