xref: /libCEED/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h (revision b2165e7a2e371018feedcb47974a027ed47e0487)
107b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
207b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
307b31e0eSJeremy L Thompson //
407b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
507b31e0eSJeremy L Thompson //
607b31e0eSJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
707b31e0eSJeremy L Thompson 
8*b2165e7aSSebastian Grimberg /// @file
9*b2165e7aSSebastian Grimberg /// Internal header for CUDA operator diagonal assembly
10*b2165e7aSSebastian Grimberg #ifndef _ceed_cuda_ref_operator_assemble_diagonal_h
11*b2165e7aSSebastian Grimberg #define _ceed_cuda_ref_operator_assemble_diagonal_h
12*b2165e7aSSebastian Grimberg 
13c9c2c079SJeremy L Thompson #include <ceed.h>
1407b31e0eSJeremy L Thompson 
15f7c1b517Snbeams #if CEEDSIZE
16f7c1b517Snbeams typedef CeedSize IndexType;
17f7c1b517Snbeams #else
18f7c1b517Snbeams typedef CeedInt IndexType;
19f7c1b517Snbeams #endif
20f7c1b517Snbeams 
2107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
2207b31e0eSJeremy L Thompson // Get Basis Emode Pointer
2307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
242b730f8bSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr, CeedEvalMode emode, const CeedScalar *identity,
252b730f8bSJeremy L Thompson                                                             const CeedScalar *interp, const CeedScalar *grad) {
2607b31e0eSJeremy L Thompson   switch (emode) {
2707b31e0eSJeremy L Thompson     case CEED_EVAL_NONE:
2807b31e0eSJeremy L Thompson       *basisptr = identity;
2907b31e0eSJeremy L Thompson       break;
3007b31e0eSJeremy L Thompson     case CEED_EVAL_INTERP:
3107b31e0eSJeremy L Thompson       *basisptr = interp;
3207b31e0eSJeremy L Thompson       break;
3307b31e0eSJeremy L Thompson     case CEED_EVAL_GRAD:
3407b31e0eSJeremy L Thompson       *basisptr = grad;
3507b31e0eSJeremy L Thompson       break;
3607b31e0eSJeremy L Thompson     case CEED_EVAL_WEIGHT:
3707b31e0eSJeremy L Thompson     case CEED_EVAL_DIV:
3807b31e0eSJeremy L Thompson     case CEED_EVAL_CURL:
3907b31e0eSJeremy L Thompson       break;  // Caught by QF Assembly
4007b31e0eSJeremy L Thompson   }
4107b31e0eSJeremy L Thompson }
4207b31e0eSJeremy L Thompson 
4307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
4407b31e0eSJeremy L Thompson // Core code for diagonal assembly
4507b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
462b730f8bSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, const bool pointBlock, const CeedScalar *identity, const CeedScalar *interpin,
472b730f8bSJeremy L Thompson                              const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein,
482b730f8bSJeremy L Thompson                              const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) {
4907b31e0eSJeremy L Thompson   const int tid = threadIdx.x;  // running with P threads, tid is evec node
5069bf922dSJeremy L Thompson   if (tid >= NNODES) return;
5107b31e0eSJeremy L Thompson 
5207b31e0eSJeremy L Thompson   // Compute the diagonal of B^T D B
5307b31e0eSJeremy L Thompson   // Each element
54f7c1b517Snbeams   for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < nelem; e += gridDim.x * blockDim.z) {
55f7c1b517Snbeams     IndexType dout = -1;
5607b31e0eSJeremy L Thompson     // Each basis eval mode pair
57f7c1b517Snbeams     for (IndexType eout = 0; eout < NUMEMODEOUT; eout++) {
5807b31e0eSJeremy L Thompson       const CeedScalar *bt = NULL;
592b730f8bSJeremy L Thompson       if (emodeout[eout] == CEED_EVAL_GRAD) dout += 1;
602b730f8bSJeremy L Thompson       CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout, &gradout[dout * NQPTS * NNODES]);
61f7c1b517Snbeams       IndexType din = -1;
62f7c1b517Snbeams       for (IndexType ein = 0; ein < NUMEMODEIN; ein++) {
6307b31e0eSJeremy L Thompson         const CeedScalar *b = NULL;
642b730f8bSJeremy L Thompson         if (emodein[ein] == CEED_EVAL_GRAD) din += 1;
652b730f8bSJeremy L Thompson         CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin, &gradin[din * NQPTS * NNODES]);
6607b31e0eSJeremy L Thompson         // Each component
67f7c1b517Snbeams         for (IndexType compOut = 0; compOut < NCOMP; compOut++) {
6807b31e0eSJeremy L Thompson           // Each qpoint/node pair
6907b31e0eSJeremy L Thompson           if (pointBlock) {
7007b31e0eSJeremy L Thompson             // Point Block Diagonal
71f7c1b517Snbeams             for (IndexType compIn = 0; compIn < NCOMP; compIn++) {
7207b31e0eSJeremy L Thompson               CeedScalar evalue = 0.;
73f7c1b517Snbeams               for (IndexType q = 0; q < NQPTS; q++) {
7407b31e0eSJeremy L Thompson                 const CeedScalar qfvalue =
752b730f8bSJeremy L Thompson                     assembledqfarray[((((ein * NCOMP + compIn) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q];
7607b31e0eSJeremy L Thompson                 evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid];
7707b31e0eSJeremy L Thompson               }
7807b31e0eSJeremy L Thompson               elemdiagarray[((compOut * NCOMP + compIn) * nelem + e) * NNODES + tid] += evalue;
7907b31e0eSJeremy L Thompson             }
8007b31e0eSJeremy L Thompson           } else {
8107b31e0eSJeremy L Thompson             // Diagonal Only
8207b31e0eSJeremy L Thompson             CeedScalar evalue = 0.;
83f7c1b517Snbeams             for (IndexType q = 0; q < NQPTS; q++) {
8407b31e0eSJeremy L Thompson               const CeedScalar qfvalue =
852b730f8bSJeremy L Thompson                   assembledqfarray[((((ein * NCOMP + compOut) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q];
8607b31e0eSJeremy L Thompson               evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid];
8707b31e0eSJeremy L Thompson             }
8807b31e0eSJeremy L Thompson             elemdiagarray[(compOut * nelem + e) * NNODES + tid] += evalue;
8907b31e0eSJeremy L Thompson           }
9007b31e0eSJeremy L Thompson         }
9107b31e0eSJeremy L Thompson       }
9207b31e0eSJeremy L Thompson     }
9307b31e0eSJeremy L Thompson   }
9407b31e0eSJeremy L Thompson }
9507b31e0eSJeremy L Thompson 
9607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
9707b31e0eSJeremy L Thompson // Linear diagonal
9807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
992b730f8bSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin,
1002b730f8bSJeremy L Thompson                                           const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein,
1012b730f8bSJeremy L Thompson                                           const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray,
10207b31e0eSJeremy L Thompson                                           CeedScalar *__restrict__ elemdiagarray) {
1032b730f8bSJeremy L Thompson   diagonalCore(nelem, false, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
10407b31e0eSJeremy L Thompson }
10507b31e0eSJeremy L Thompson 
10607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
10707b31e0eSJeremy L Thompson // Linear point block diagonal
10807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
1092b730f8bSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin,
1102b730f8bSJeremy L Thompson                                                     const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout,
11107b31e0eSJeremy L Thompson                                                     const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
1122b730f8bSJeremy L Thompson                                                     const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) {
1132b730f8bSJeremy L Thompson   diagonalCore(nelem, true, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
11407b31e0eSJeremy L Thompson }
11507b31e0eSJeremy L Thompson 
11607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
117*b2165e7aSSebastian Grimberg 
118*b2165e7aSSebastian Grimberg #endif
119