1*07b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*07b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*07b31e0eSJeremy L Thompson // 4*07b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 5*07b31e0eSJeremy L Thompson // 6*07b31e0eSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7*07b31e0eSJeremy L Thompson 8*07b31e0eSJeremy L Thompson #include <ceed/ceed.h> 9*07b31e0eSJeremy L Thompson 10*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 11*07b31e0eSJeremy L Thompson // Diagonal assembly kernels 12*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 13*07b31e0eSJeremy L Thompson typedef enum { 14*07b31e0eSJeremy L Thompson /// Perform no evaluation (either because there is no data or it is already at 15*07b31e0eSJeremy L Thompson /// quadrature points) 16*07b31e0eSJeremy L Thompson CEED_EVAL_NONE = 0, 17*07b31e0eSJeremy L Thompson /// Interpolate from nodes to quadrature points 18*07b31e0eSJeremy L Thompson CEED_EVAL_INTERP = 1, 19*07b31e0eSJeremy L Thompson /// Evaluate gradients at quadrature points from input in a nodal basis 20*07b31e0eSJeremy L Thompson CEED_EVAL_GRAD = 2, 21*07b31e0eSJeremy L Thompson /// Evaluate divergence at quadrature points from input in a nodal basis 22*07b31e0eSJeremy L Thompson CEED_EVAL_DIV = 4, 23*07b31e0eSJeremy L Thompson /// Evaluate curl at quadrature points from input in a nodal basis 24*07b31e0eSJeremy L Thompson CEED_EVAL_CURL = 8, 25*07b31e0eSJeremy L Thompson /// Using no input, evaluate quadrature weights on the reference element 26*07b31e0eSJeremy L Thompson CEED_EVAL_WEIGHT = 16, 27*07b31e0eSJeremy L Thompson } CeedEvalMode; 28*07b31e0eSJeremy L Thompson 29*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 30*07b31e0eSJeremy L Thompson // Get Basis Emode Pointer 31*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 32*07b31e0eSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr, 33*07b31e0eSJeremy L Thompson CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp, 34*07b31e0eSJeremy L Thompson const CeedScalar *grad) { 35*07b31e0eSJeremy L Thompson switch (emode) { 36*07b31e0eSJeremy L Thompson case CEED_EVAL_NONE: 37*07b31e0eSJeremy L Thompson *basisptr = identity; 38*07b31e0eSJeremy L Thompson break; 39*07b31e0eSJeremy L Thompson case CEED_EVAL_INTERP: 40*07b31e0eSJeremy L Thompson *basisptr = interp; 41*07b31e0eSJeremy L Thompson break; 42*07b31e0eSJeremy L Thompson case CEED_EVAL_GRAD: 43*07b31e0eSJeremy L Thompson *basisptr = grad; 44*07b31e0eSJeremy L Thompson break; 45*07b31e0eSJeremy L Thompson case CEED_EVAL_WEIGHT: 46*07b31e0eSJeremy L Thompson case CEED_EVAL_DIV: 47*07b31e0eSJeremy L Thompson case CEED_EVAL_CURL: 48*07b31e0eSJeremy L Thompson break; // Caught by QF Assembly 49*07b31e0eSJeremy L Thompson } 50*07b31e0eSJeremy L Thompson } 51*07b31e0eSJeremy L Thompson 52*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 53*07b31e0eSJeremy L Thompson // Core code for diagonal assembly 54*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 55*07b31e0eSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, 56*07b31e0eSJeremy L Thompson const CeedScalar maxnorm, const bool pointBlock, 57*07b31e0eSJeremy L Thompson const CeedScalar *identity, 58*07b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 59*07b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 60*07b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 61*07b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 62*07b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 63*07b31e0eSJeremy L Thompson const int tid = threadIdx.x; // running with P threads, tid is evec node 64*07b31e0eSJeremy L Thompson const CeedScalar qfvaluebound = maxnorm*1e-12; 65*07b31e0eSJeremy L Thompson 66*07b31e0eSJeremy L Thompson // Compute the diagonal of B^T D B 67*07b31e0eSJeremy L Thompson // Each element 68*07b31e0eSJeremy L Thompson for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem; 69*07b31e0eSJeremy L Thompson e += gridDim.x*blockDim.z) { 70*07b31e0eSJeremy L Thompson CeedInt dout = -1; 71*07b31e0eSJeremy L Thompson // Each basis eval mode pair 72*07b31e0eSJeremy L Thompson for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) { 73*07b31e0eSJeremy L Thompson const CeedScalar *bt = NULL; 74*07b31e0eSJeremy L Thompson if (emodeout[eout] == CEED_EVAL_GRAD) 75*07b31e0eSJeremy L Thompson dout += 1; 76*07b31e0eSJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout, 77*07b31e0eSJeremy L Thompson &gradout[dout*NQPTS*NNODES]); 78*07b31e0eSJeremy L Thompson CeedInt din = -1; 79*07b31e0eSJeremy L Thompson for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) { 80*07b31e0eSJeremy L Thompson const CeedScalar *b = NULL; 81*07b31e0eSJeremy L Thompson if (emodein[ein] == CEED_EVAL_GRAD) 82*07b31e0eSJeremy L Thompson din += 1; 83*07b31e0eSJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin, 84*07b31e0eSJeremy L Thompson &gradin[din*NQPTS*NNODES]); 85*07b31e0eSJeremy L Thompson // Each component 86*07b31e0eSJeremy L Thompson for (CeedInt compOut = 0; compOut < NCOMP; compOut++) { 87*07b31e0eSJeremy L Thompson // Each qpoint/node pair 88*07b31e0eSJeremy L Thompson if (pointBlock) { 89*07b31e0eSJeremy L Thompson // Point Block Diagonal 90*07b31e0eSJeremy L Thompson for (CeedInt compIn = 0; compIn < NCOMP; compIn++) { 91*07b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 92*07b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 93*07b31e0eSJeremy L Thompson const CeedScalar qfvalue = 94*07b31e0eSJeremy L Thompson assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* 95*07b31e0eSJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 96*07b31e0eSJeremy L Thompson if (abs(qfvalue) > qfvaluebound) 97*07b31e0eSJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 98*07b31e0eSJeremy L Thompson } 99*07b31e0eSJeremy L Thompson elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; 100*07b31e0eSJeremy L Thompson } 101*07b31e0eSJeremy L Thompson } else { 102*07b31e0eSJeremy L Thompson // Diagonal Only 103*07b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 104*07b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 105*07b31e0eSJeremy L Thompson const CeedScalar qfvalue = 106*07b31e0eSJeremy L Thompson assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* 107*07b31e0eSJeremy L Thompson NCOMP+compOut)*nelem+e)*NQPTS+q]; 108*07b31e0eSJeremy L Thompson if (abs(qfvalue) > qfvaluebound) 109*07b31e0eSJeremy L Thompson evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 110*07b31e0eSJeremy L Thompson } 111*07b31e0eSJeremy L Thompson elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; 112*07b31e0eSJeremy L Thompson } 113*07b31e0eSJeremy L Thompson } 114*07b31e0eSJeremy L Thompson } 115*07b31e0eSJeremy L Thompson } 116*07b31e0eSJeremy L Thompson } 117*07b31e0eSJeremy L Thompson } 118*07b31e0eSJeremy L Thompson 119*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 120*07b31e0eSJeremy L Thompson // Linear diagonal 121*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 122*07b31e0eSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, 123*07b31e0eSJeremy L Thompson const CeedScalar maxnorm, const CeedScalar *identity, 124*07b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 125*07b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 126*07b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 127*07b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 128*07b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 129*07b31e0eSJeremy L Thompson diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout, 130*07b31e0eSJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 131*07b31e0eSJeremy L Thompson } 132*07b31e0eSJeremy L Thompson 133*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 134*07b31e0eSJeremy L Thompson // Linear point block diagonal 135*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 136*07b31e0eSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, 137*07b31e0eSJeremy L Thompson const CeedScalar maxnorm, const CeedScalar *identity, 138*07b31e0eSJeremy L Thompson const CeedScalar *interpin, const CeedScalar *gradin, 139*07b31e0eSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, 140*07b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 141*07b31e0eSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, 142*07b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 143*07b31e0eSJeremy L Thompson diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout, 144*07b31e0eSJeremy L Thompson gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 145*07b31e0eSJeremy L Thompson } 146*07b31e0eSJeremy L Thompson 147*07b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 148