107b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 207b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 307b31e0eSJeremy L Thompson // 407b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 507b31e0eSJeremy L Thompson // 607b31e0eSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 707b31e0eSJeremy L Thompson 8c9c2c079SJeremy L Thompson #include <ceed.h> 907b31e0eSJeremy L Thompson 1007b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 1107b31e0eSJeremy L Thompson // Diagonal assembly kernels 1207b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 1307b31e0eSJeremy L Thompson typedef enum { 1407b31e0eSJeremy L Thompson /// Perform no evaluation (either because there is no data or it is already at 1507b31e0eSJeremy L Thompson /// quadrature points) 1607b31e0eSJeremy L Thompson CEED_EVAL_NONE = 0, 1707b31e0eSJeremy L Thompson /// Interpolate from nodes to quadrature points 1807b31e0eSJeremy L Thompson CEED_EVAL_INTERP = 1, 1907b31e0eSJeremy L Thompson /// Evaluate gradients at quadrature points from input in a nodal basis 2007b31e0eSJeremy L Thompson CEED_EVAL_GRAD = 2, 2107b31e0eSJeremy L Thompson /// Evaluate divergence at quadrature points from input in a nodal basis 2207b31e0eSJeremy L Thompson CEED_EVAL_DIV = 4, 2307b31e0eSJeremy L Thompson /// Evaluate curl at quadrature points from input in a nodal basis 2407b31e0eSJeremy L Thompson CEED_EVAL_CURL = 8, 2507b31e0eSJeremy L Thompson /// Using no input, evaluate quadrature weights on the reference element 2607b31e0eSJeremy L Thompson CEED_EVAL_WEIGHT = 16, 2707b31e0eSJeremy L Thompson } CeedEvalMode; 2807b31e0eSJeremy L Thompson 2907b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 3007b31e0eSJeremy L Thompson // Get Basis Emode Pointer 3107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 32*2b730f8bSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr, CeedEvalMode emode, const CeedScalar *identity, 33*2b730f8bSJeremy L Thompson const CeedScalar *interp, const CeedScalar *grad) { 3407b31e0eSJeremy L Thompson switch (emode) { 3507b31e0eSJeremy L Thompson case CEED_EVAL_NONE: 3607b31e0eSJeremy L Thompson *basisptr = identity; 3707b31e0eSJeremy L Thompson break; 3807b31e0eSJeremy L Thompson case CEED_EVAL_INTERP: 3907b31e0eSJeremy L Thompson *basisptr = interp; 4007b31e0eSJeremy L Thompson break; 4107b31e0eSJeremy L Thompson case CEED_EVAL_GRAD: 4207b31e0eSJeremy L Thompson *basisptr = grad; 4307b31e0eSJeremy L Thompson break; 4407b31e0eSJeremy L Thompson case CEED_EVAL_WEIGHT: 4507b31e0eSJeremy L Thompson case CEED_EVAL_DIV: 4607b31e0eSJeremy L Thompson case CEED_EVAL_CURL: 4707b31e0eSJeremy L Thompson break; // Caught by QF Assembly 4807b31e0eSJeremy L Thompson } 4907b31e0eSJeremy L Thompson } 5007b31e0eSJeremy L Thompson 5107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 5207b31e0eSJeremy L Thompson // Core code for diagonal assembly 5307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 54*2b730f8bSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, const bool pointBlock, const CeedScalar *identity, const CeedScalar *interpin, 55*2b730f8bSJeremy L Thompson const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, 56*2b730f8bSJeremy L Thompson const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { 5707b31e0eSJeremy L Thompson const int tid = threadIdx.x; // running with P threads, tid is evec node 5869bf922dSJeremy L Thompson if (tid >= NNODES) return; 5907b31e0eSJeremy L Thompson 6007b31e0eSJeremy L Thompson // Compute the diagonal of B^T D B 6107b31e0eSJeremy L Thompson // Each element 62*2b730f8bSJeremy L Thompson for (CeedInt e = blockIdx.x * blockDim.z + threadIdx.z; e < nelem; e += gridDim.x * blockDim.z) { 6307b31e0eSJeremy L Thompson CeedInt dout = -1; 6407b31e0eSJeremy L Thompson // Each basis eval mode pair 6507b31e0eSJeremy L Thompson for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) { 6607b31e0eSJeremy L Thompson const CeedScalar *bt = NULL; 67*2b730f8bSJeremy L Thompson if (emodeout[eout] == CEED_EVAL_GRAD) dout += 1; 68*2b730f8bSJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout, &gradout[dout * NQPTS * NNODES]); 6907b31e0eSJeremy L Thompson CeedInt din = -1; 7007b31e0eSJeremy L Thompson for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) { 7107b31e0eSJeremy L Thompson const CeedScalar *b = NULL; 72*2b730f8bSJeremy L Thompson if (emodein[ein] == CEED_EVAL_GRAD) din += 1; 73*2b730f8bSJeremy L Thompson CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin, &gradin[din * NQPTS * NNODES]); 7407b31e0eSJeremy L Thompson // Each component 7507b31e0eSJeremy L Thompson for (CeedInt compOut = 0; compOut < NCOMP; compOut++) { 7607b31e0eSJeremy L Thompson // Each qpoint/node pair 7707b31e0eSJeremy L Thompson if (pointBlock) { 7807b31e0eSJeremy L Thompson // Point Block Diagonal 7907b31e0eSJeremy L Thompson for (CeedInt compIn = 0; compIn < NCOMP; compIn++) { 8007b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 8107b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 8207b31e0eSJeremy L Thompson const CeedScalar qfvalue = 83*2b730f8bSJeremy L Thompson assembledqfarray[((((ein * NCOMP + compIn) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q]; 8407b31e0eSJeremy L Thompson evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid]; 8507b31e0eSJeremy L Thompson } 8607b31e0eSJeremy L Thompson elemdiagarray[((compOut * NCOMP + compIn) * nelem + e) * NNODES + tid] += evalue; 8707b31e0eSJeremy L Thompson } 8807b31e0eSJeremy L Thompson } else { 8907b31e0eSJeremy L Thompson // Diagonal Only 9007b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 9107b31e0eSJeremy L Thompson for (CeedInt q = 0; q < NQPTS; q++) { 9207b31e0eSJeremy L Thompson const CeedScalar qfvalue = 93*2b730f8bSJeremy L Thompson assembledqfarray[((((ein * NCOMP + compOut) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q]; 9407b31e0eSJeremy L Thompson evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid]; 9507b31e0eSJeremy L Thompson } 9607b31e0eSJeremy L Thompson elemdiagarray[(compOut * nelem + e) * NNODES + tid] += evalue; 9707b31e0eSJeremy L Thompson } 9807b31e0eSJeremy L Thompson } 9907b31e0eSJeremy L Thompson } 10007b31e0eSJeremy L Thompson } 10107b31e0eSJeremy L Thompson } 10207b31e0eSJeremy L Thompson } 10307b31e0eSJeremy L Thompson 10407b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 10507b31e0eSJeremy L Thompson // Linear diagonal 10607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 107*2b730f8bSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, 108*2b730f8bSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, 109*2b730f8bSJeremy L Thompson const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, 11007b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 111*2b730f8bSJeremy L Thompson diagonalCore(nelem, false, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 11207b31e0eSJeremy L Thompson } 11307b31e0eSJeremy L Thompson 11407b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 11507b31e0eSJeremy L Thompson // Linear point block diagonal 11607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 117*2b730f8bSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin, 118*2b730f8bSJeremy L Thompson const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, 11907b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 120*2b730f8bSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { 121*2b730f8bSJeremy L Thompson diagonalCore(nelem, true, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 12207b31e0eSJeremy L Thompson } 12307b31e0eSJeremy L Thompson 12407b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 125