1*07b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*07b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*07b31e0eSJeremy L Thompson // 4*07b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 5*07b31e0eSJeremy L Thompson // 6*07b31e0eSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7*07b31e0eSJeremy L Thompson 8*07b31e0eSJeremy L Thompson #include <ceed/ceed.h> 9*07b31e0eSJeremy L Thompson 10*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 11*07b31e0eSJeremy L Thompson // Diagonal assembly kernels 12*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 13*07b31e0eSJeremy L Thompson 14*07b31e0eSJeremy L Thompson typedef enum { 15*07b31e0eSJeremy L Thompson /// Perform no evaluation (either because there is no data or it is already at 16*07b31e0eSJeremy L Thompson /// quadrature points) 17*07b31e0eSJeremy L Thompson CEED_EVAL_NONE = 0, 18*07b31e0eSJeremy L Thompson /// Interpolate from nodes to quadrature points 19*07b31e0eSJeremy L Thompson CEED_EVAL_INTERP = 1, 20*07b31e0eSJeremy L Thompson /// Evaluate gradients at quadrature points from input in a nodal basis 21*07b31e0eSJeremy L Thompson CEED_EVAL_GRAD = 2, 22*07b31e0eSJeremy L Thompson /// Evaluate divergence at quadrature points from input in a nodal basis 23*07b31e0eSJeremy L Thompson CEED_EVAL_DIV = 4, 24*07b31e0eSJeremy L Thompson /// Evaluate curl at quadrature points from input in a nodal basis 25*07b31e0eSJeremy L Thompson CEED_EVAL_CURL = 8, 26*07b31e0eSJeremy L Thompson /// Using no input, evaluate quadrature weights on the reference element 27*07b31e0eSJeremy L Thompson CEED_EVAL_WEIGHT = 16, 28*07b31e0eSJeremy L Thompson } CeedEvalMode; 29*07b31e0eSJeremy L Thompson 30*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 31*07b31e0eSJeremy L Thompson // Get Basis Emode Pointer 32*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 33*07b31e0eSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr, 34*07b31e0eSJeremy L Thompson CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp, 35*07b31e0eSJeremy L Thompson const CeedScalar *grad) { 36*07b31e0eSJeremy L Thompson switch (emode) { 37*07b31e0eSJeremy L Thompson case CEED_EVAL_NONE: 38*07b31e0eSJeremy L Thompson *basisptr = identity; 39*07b31e0eSJeremy L Thompson break; 40*07b31e0eSJeremy L Thompson case CEED_EVAL_INTERP: 41*07b31e0eSJeremy L Thompson *basisptr = interp; 42*07b31e0eSJeremy L Thompson break; 43*07b31e0eSJeremy L Thompson case CEED_EVAL_GRAD: 44*07b31e0eSJeremy L Thompson *basisptr = grad; 45*07b31e0eSJeremy L Thompson break; 46*07b31e0eSJeremy L Thompson case CEED_EVAL_WEIGHT: 47*07b31e0eSJeremy L Thompson case CEED_EVAL_DIV: 48*07b31e0eSJeremy L Thompson case CEED_EVAL_CURL: 49*07b31e0eSJeremy L Thompson break; // Caught by QF Assembly 50*07b31e0eSJeremy L Thompson } 51*07b31e0eSJeremy L Thompson } 52*07b31e0eSJeremy L Thompson 53*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 54*07b31e0eSJeremy L Thompson // Core code for diagonal assembly 55*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 56*07b31e0eSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, 57*07b31e0eSJeremy L Thompson const CeedScalar maxnorm, const bool pointBlock, 58*07b31e0eSJeremy L Thompson const CeedScalar *identity, 59*07b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 60*07b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 61*07b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 62*07b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 63*07b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 64*07b31e0eSJeremy L Thompson const int tid = threadIdx.x; // running with P threads, tid is evec node 65*07b31e0eSJeremy L Thompson const CeedScalar qfvaluebound = maxnorm*1e-12; 66*07b31e0eSJeremy L Thompson 67*07b31e0eSJeremy L Thompson // Compute the diagonal of B^T D B 68*07b31e0eSJeremy L Thompson // Each element 69*07b31e0eSJeremy L Thompson for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem; 70*07b31e0eSJeremy L Thompson e += gridDim.x*blockDim.z) { 71*07b31e0eSJeremy L Thompson CeedInt dout = -1; 72*07b31e0eSJeremy L Thompson // Each basis eval mode pair 73*07b31e0eSJeremy L Thompson for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) { 74*07b31e0eSJeremy L Thompson const CeedScalar *bt = NULL; 75*07b31e0eSJeremy L Thompson if (emodeout[eout] == CEED_EVAL_GRAD) 76*07b31e0eSJeremy L Thompson dout += 1; 77*07b31e0eSJeremy L Thompson CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout, 78*07b31e0eSJeremy L Thompson &gradout[dout*NQPTS*NNODES]); 79*07b31e0eSJeremy L Thompson CeedInt din = -1; 80*07b31e0eSJeremy L Thompson for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) { 81*07b31e0eSJeremy L Thompson const CeedScalar *b = NULL; 82*07b31e0eSJeremy L Thompson if (emodein[ein] == CEED_EVAL_GRAD) 83*07b31e0eSJeremy L Thompson din += 1; 84*07b31e0eSJeremy L Thompson CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin, 85*07b31e0eSJeremy L Thompson &gradin[din*NQPTS*NNODES]); 86*07b31e0eSJeremy L Thompson // Each component 87*07b31e0eSJeremy L Thompson for (CeedInt compOut = 0; compOut < NCOMP; compOut++) { 88*07b31e0eSJeremy L Thompson // Each qpoint/node pair 89*07b31e0eSJeremy L Thompson if (pointBlock) { 90*07b31e0eSJeremy L Thompson // Point Block Diagonal 91*07b31e0eSJeremy L Thompson for (CeedInt compIn = 0; compIn < NCOMP; compIn++) { 92*07b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 93*07b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 94*07b31e0eSJeremy L Thompson const CeedScalar qfvalue = 95*07b31e0eSJeremy L Thompson assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* 96*07b31e0eSJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 97*07b31e0eSJeremy L Thompson if (abs(qfvalue) > qfvaluebound) 98*07b31e0eSJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 99*07b31e0eSJeremy L Thompson } 100*07b31e0eSJeremy L Thompson elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; 101*07b31e0eSJeremy L Thompson } 102*07b31e0eSJeremy L Thompson } else { 103*07b31e0eSJeremy L Thompson // Diagonal Only 104*07b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 105*07b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 106*07b31e0eSJeremy L Thompson const CeedScalar qfvalue = 107*07b31e0eSJeremy L Thompson assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* 108*07b31e0eSJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 109*07b31e0eSJeremy L Thompson if (abs(qfvalue) > qfvaluebound) 110*07b31e0eSJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 111*07b31e0eSJeremy L Thompson } 112*07b31e0eSJeremy L Thompson elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; 113*07b31e0eSJeremy L Thompson } 114*07b31e0eSJeremy L Thompson } 115*07b31e0eSJeremy L Thompson } 116*07b31e0eSJeremy L Thompson } 117*07b31e0eSJeremy L Thompson } 118*07b31e0eSJeremy L Thompson } 119*07b31e0eSJeremy L Thompson 120*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 121*07b31e0eSJeremy L Thompson // Linear diagonal 122*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 123*07b31e0eSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, 124*07b31e0eSJeremy L Thompson const CeedScalar maxnorm, const CeedScalar *identity, 125*07b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 126*07b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 127*07b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 128*07b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 129*07b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 130*07b31e0eSJeremy L Thompson diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout, 131*07b31e0eSJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 132*07b31e0eSJeremy L Thompson } 133*07b31e0eSJeremy L Thompson 134*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 135*07b31e0eSJeremy L Thompson // Linear point block diagonal 136*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 137*07b31e0eSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, 138*07b31e0eSJeremy L Thompson const CeedScalar maxnorm, const CeedScalar *identity, 139*07b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 140*07b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 141*07b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 142*07b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 143*07b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 144*07b31e0eSJeremy L Thompson diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout, 145*07b31e0eSJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 146*07b31e0eSJeremy L Thompson } 147*07b31e0eSJeremy L Thompson 148*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 149