107b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 207b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 307b31e0eSJeremy L Thompson // 407b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 507b31e0eSJeremy L Thompson // 607b31e0eSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 707b31e0eSJeremy L Thompson 8*b2165e7aSSebastian Grimberg /// @file 9*b2165e7aSSebastian Grimberg /// Internal header for CUDA operator diagonal assembly 10*b2165e7aSSebastian Grimberg #ifndef _ceed_cuda_ref_operator_assemble_diagonal_h 11*b2165e7aSSebastian Grimberg #define _ceed_cuda_ref_operator_assemble_diagonal_h 12*b2165e7aSSebastian Grimberg 13c9c2c079SJeremy L Thompson #include <ceed.h> 1407b31e0eSJeremy L Thompson 15f7c1b517Snbeams #if CEEDSIZE 16f7c1b517Snbeams typedef CeedSize IndexType; 17f7c1b517Snbeams #else 18f7c1b517Snbeams typedef CeedInt IndexType; 19f7c1b517Snbeams #endif 20f7c1b517Snbeams 2107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 2207b31e0eSJeremy L Thompson // Get Basis Emode Pointer 2307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 242b730f8bSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr, CeedEvalMode emode, const CeedScalar *identity, 252b730f8bSJeremy L Thompson const CeedScalar *interp, const CeedScalar *grad) { 2607b31e0eSJeremy L Thompson switch (emode) { 2707b31e0eSJeremy L Thompson case CEED_EVAL_NONE: 2807b31e0eSJeremy L Thompson *basisptr = identity; 2907b31e0eSJeremy L Thompson break; 3007b31e0eSJeremy L Thompson case CEED_EVAL_INTERP: 3107b31e0eSJeremy L Thompson *basisptr = interp; 3207b31e0eSJeremy L Thompson break; 3307b31e0eSJeremy L Thompson case CEED_EVAL_GRAD: 3407b31e0eSJeremy L Thompson *basisptr = grad; 3507b31e0eSJeremy L Thompson break; 3607b31e0eSJeremy L Thompson case CEED_EVAL_WEIGHT: 3707b31e0eSJeremy L Thompson case CEED_EVAL_DIV: 3807b31e0eSJeremy L Thompson case CEED_EVAL_CURL: 3907b31e0eSJeremy L Thompson break; // Caught by QF Assembly 4007b31e0eSJeremy L Thompson } 4107b31e0eSJeremy L Thompson } 4207b31e0eSJeremy L Thompson 4307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 4407b31e0eSJeremy L Thompson // Core code for diagonal assembly 4507b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 462b730f8bSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, const bool pointBlock, const CeedScalar *identity, const CeedScalar *interpin, 472b730f8bSJeremy L Thompson const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, 482b730f8bSJeremy L Thompson const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { 4907b31e0eSJeremy L Thompson const int tid = threadIdx.x; // running with P threads, tid is evec node 5069bf922dSJeremy L Thompson if (tid >= NNODES) return; 5107b31e0eSJeremy L Thompson 5207b31e0eSJeremy L Thompson // Compute the diagonal of B^T D B 5307b31e0eSJeremy L Thompson // Each element 54f7c1b517Snbeams for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < nelem; e += gridDim.x * blockDim.z) { 55f7c1b517Snbeams IndexType dout = -1; 5607b31e0eSJeremy L Thompson // Each basis eval mode pair 57f7c1b517Snbeams for (IndexType eout = 0; eout < NUMEMODEOUT; eout++) { 5807b31e0eSJeremy L Thompson const CeedScalar *bt = NULL; 592b730f8bSJeremy L Thompson if (emodeout[eout] == CEED_EVAL_GRAD) dout += 1; 602b730f8bSJeremy L Thompson CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout, &gradout[dout * NQPTS * NNODES]); 61f7c1b517Snbeams IndexType din = -1; 62f7c1b517Snbeams for (IndexType ein = 0; ein < NUMEMODEIN; ein++) { 6307b31e0eSJeremy L Thompson const CeedScalar *b = NULL; 642b730f8bSJeremy L Thompson if (emodein[ein] == CEED_EVAL_GRAD) din += 1; 652b730f8bSJeremy L Thompson CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin, &gradin[din * NQPTS * NNODES]); 6607b31e0eSJeremy L Thompson // Each component 67f7c1b517Snbeams for (IndexType compOut = 0; compOut < NCOMP; compOut++) { 6807b31e0eSJeremy L Thompson // Each qpoint/node pair 6907b31e0eSJeremy L Thompson if (pointBlock) { 7007b31e0eSJeremy L Thompson // Point Block Diagonal 71f7c1b517Snbeams for (IndexType compIn = 0; compIn < NCOMP; compIn++) { 7207b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 73f7c1b517Snbeams for (IndexType q = 0; q < NQPTS; q++) { 7407b31e0eSJeremy L Thompson const CeedScalar qfvalue = 752b730f8bSJeremy L Thompson assembledqfarray[((((ein * NCOMP + compIn) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q]; 7607b31e0eSJeremy L Thompson evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid]; 7707b31e0eSJeremy L Thompson } 7807b31e0eSJeremy L Thompson elemdiagarray[((compOut * NCOMP + compIn) * nelem + e) * NNODES + tid] += evalue; 7907b31e0eSJeremy L Thompson } 8007b31e0eSJeremy L Thompson } else { 8107b31e0eSJeremy L Thompson // Diagonal Only 8207b31e0eSJeremy L Thompson CeedScalar evalue = 0.; 83f7c1b517Snbeams for (IndexType q = 0; q < NQPTS; q++) { 8407b31e0eSJeremy L Thompson const CeedScalar qfvalue = 852b730f8bSJeremy L Thompson assembledqfarray[((((ein * NCOMP + compOut) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q]; 8607b31e0eSJeremy L Thompson evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid]; 8707b31e0eSJeremy L Thompson } 8807b31e0eSJeremy L Thompson elemdiagarray[(compOut * nelem + e) * NNODES + tid] += evalue; 8907b31e0eSJeremy L Thompson } 9007b31e0eSJeremy L Thompson } 9107b31e0eSJeremy L Thompson } 9207b31e0eSJeremy L Thompson } 9307b31e0eSJeremy L Thompson } 9407b31e0eSJeremy L Thompson } 9507b31e0eSJeremy L Thompson 9607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 9707b31e0eSJeremy L Thompson // Linear diagonal 9807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 992b730f8bSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, 1002b730f8bSJeremy L Thompson const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, 1012b730f8bSJeremy L Thompson const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, 10207b31e0eSJeremy L Thompson CeedScalar *__restrict__ elemdiagarray) { 1032b730f8bSJeremy L Thompson diagonalCore(nelem, false, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 10407b31e0eSJeremy L Thompson } 10507b31e0eSJeremy L Thompson 10607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 10707b31e0eSJeremy L Thompson // Linear point block diagonal 10807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 1092b730f8bSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin, 1102b730f8bSJeremy L Thompson const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, 11107b31e0eSJeremy L Thompson const CeedEvalMode *emodein, const CeedEvalMode *emodeout, 1122b730f8bSJeremy L Thompson const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { 1132b730f8bSJeremy L Thompson diagonalCore(nelem, true, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); 11407b31e0eSJeremy L Thompson } 11507b31e0eSJeremy L Thompson 11607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 117*b2165e7aSSebastian Grimberg 118*b2165e7aSSebastian Grimberg #endif 119