1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 10 //------------------------------------------------------------------------------ 11 // Diagonal assembly kernels 12 //------------------------------------------------------------------------------ 13 typedef enum { 14 /// Perform no evaluation (either because there is no data or it is already at 15 /// quadrature points) 16 CEED_EVAL_NONE = 0, 17 /// Interpolate from nodes to quadrature points 18 CEED_EVAL_INTERP = 1, 19 /// Evaluate gradients at quadrature points from input in a nodal basis 20 CEED_EVAL_GRAD = 2, 21 /// Evaluate divergence at quadrature points from input in a nodal basis 22 CEED_EVAL_DIV = 4, 23 /// Evaluate curl at quadrature points from input in a nodal basis 24 CEED_EVAL_CURL = 8, 25 /// Using no input, evaluate quadrature weights on the reference element 26 CEED_EVAL_WEIGHT = 16, 27 } CeedEvalMode; 28 29 //------------------------------------------------------------------------------ 30 // Get Basis Emode Pointer 31 //------------------------------------------------------------------------------ 32 extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr, 33 CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp, 34 const CeedScalar *grad) { 35 switch (emode) { 36 case CEED_EVAL_NONE: 37 *basisptr = identity; 38 break; 39 case CEED_EVAL_INTERP: 40 *basisptr = interp; 41 break; 42 case CEED_EVAL_GRAD: 43 *basisptr = grad; 44 break; 45 case CEED_EVAL_WEIGHT: 46 case CEED_EVAL_DIV: 47 case CEED_EVAL_CURL: 48 break; // Caught by QF Assembly 49 } 50 } 51 52 //------------------------------------------------------------------------------ 53 // Core code for diagonal assembly 54 //------------------------------------------------------------------------------ 55 __device__ void diagonalCore(const CeedInt nelem, 56 const bool pointBlock, const CeedScalar *identity, 57 const CeedScalar *interpin, const CeedScalar *gradin, 58 const CeedScalar *interpout, const CeedScalar *gradout, 59 const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 60 const CeedScalar *__restrict__ assembledqfarray, 61 CeedScalar *__restrict__ elemdiagarray) { 62 const int tid = threadIdx.x; // running with P threads, tid is evec node 63 if (tid >= NNODES) return; 64 65 // Compute the diagonal of B^T D B 66 // Each element 67 for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem; 68 e += gridDim.x*blockDim.z) { 69 CeedInt dout = -1; 70 // Each basis eval mode pair 71 for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) { 72 const CeedScalar *bt = NULL; 73 if (emodeout[eout] == CEED_EVAL_GRAD) 74 dout += 1; 75 CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout, 76 &gradout[dout*NQPTS*NNODES]); 77 CeedInt din = -1; 78 for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) { 79 const CeedScalar *b = NULL; 80 if (emodein[ein] == CEED_EVAL_GRAD) 81 din += 1; 82 CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin, 83 &gradin[din*NQPTS*NNODES]); 84 // Each component 85 for (CeedInt compOut = 0; compOut < NCOMP; compOut++) { 86 // Each qpoint/node pair 87 if (pointBlock) { 88 // Point Block Diagonal 89 for (CeedInt compIn = 0; compIn < NCOMP; compIn++) { 90 CeedScalar evalue = 0.; 91 for (CeedInt q = 0; q < NQPTS; q++) { 92 const CeedScalar qfvalue = 93 assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* 94 NCOMP+compOut)*nelem+e)*NQPTS+q]; 95 evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 96 } 97 elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; 98 } 99 } else { 100 // Diagonal Only 101 CeedScalar evalue = 0.; 102 for (CeedInt q = 0; q < NQPTS; q++) { 103 const CeedScalar qfvalue = 104 assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* 105 NCOMP+compOut)*nelem+e)*NQPTS+q]; 106 evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; 107 } 108 elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; 109 } 110 } 111 } 112 } 113 } 114 } 115 116 //------------------------------------------------------------------------------ 117 // Linear diagonal 118 //------------------------------------------------------------------------------ 119 extern "C" __global__ void linearDiagonal(const CeedInt nelem, 120 const CeedScalar *identity, 121 const CeedScalar *interpin, const CeedScalar *gradin, 122 const CeedScalar *interpout, const CeedScalar *gradout, 123 const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 124 const CeedScalar *__restrict__ assembledqfarray, 125 CeedScalar *__restrict__ elemdiagarray) { 126 diagonalCore(nelem, false, identity, interpin, gradin, interpout, 127 gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 128 } 129 130 //------------------------------------------------------------------------------ 131 // Linear point block diagonal 132 //------------------------------------------------------------------------------ 133 extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, 134 const CeedScalar *identity, 135 const CeedScalar *interpin, const CeedScalar *gradin, 136 const CeedScalar *interpout, const CeedScalar *gradout, 137 const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 138 const CeedScalar *__restrict__ assembledqfarray, 139 CeedScalar *__restrict__ elemdiagarray) { 140 diagonalCore(nelem, true, identity, interpin, gradin, interpout, 141 gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 142 } 143 144 //------------------------------------------------------------------------------ 145