xref: /libCEED/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h (revision f7c1b517c323a719e436874ff6ee79be51545b0d)
107b31e0eSJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
207b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
307b31e0eSJeremy L Thompson //
407b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
507b31e0eSJeremy L Thompson //
607b31e0eSJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
707b31e0eSJeremy L Thompson 
8c9c2c079SJeremy L Thompson #include <ceed.h>
907b31e0eSJeremy L Thompson 
10*f7c1b517Snbeams #if CEEDSIZE
11*f7c1b517Snbeams typedef CeedSize IndexType;
12*f7c1b517Snbeams #else
13*f7c1b517Snbeams typedef CeedInt IndexType;
14*f7c1b517Snbeams #endif
15*f7c1b517Snbeams 
1607b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
1707b31e0eSJeremy L Thompson // Get Basis Emode Pointer
1807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
192b730f8bSJeremy L Thompson extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr, CeedEvalMode emode, const CeedScalar *identity,
202b730f8bSJeremy L Thompson                                                             const CeedScalar *interp, const CeedScalar *grad) {
2107b31e0eSJeremy L Thompson   switch (emode) {
2207b31e0eSJeremy L Thompson     case CEED_EVAL_NONE:
2307b31e0eSJeremy L Thompson       *basisptr = identity;
2407b31e0eSJeremy L Thompson       break;
2507b31e0eSJeremy L Thompson     case CEED_EVAL_INTERP:
2607b31e0eSJeremy L Thompson       *basisptr = interp;
2707b31e0eSJeremy L Thompson       break;
2807b31e0eSJeremy L Thompson     case CEED_EVAL_GRAD:
2907b31e0eSJeremy L Thompson       *basisptr = grad;
3007b31e0eSJeremy L Thompson       break;
3107b31e0eSJeremy L Thompson     case CEED_EVAL_WEIGHT:
3207b31e0eSJeremy L Thompson     case CEED_EVAL_DIV:
3307b31e0eSJeremy L Thompson     case CEED_EVAL_CURL:
3407b31e0eSJeremy L Thompson       break;  // Caught by QF Assembly
3507b31e0eSJeremy L Thompson   }
3607b31e0eSJeremy L Thompson }
3707b31e0eSJeremy L Thompson 
3807b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
3907b31e0eSJeremy L Thompson // Core code for diagonal assembly
4007b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
412b730f8bSJeremy L Thompson __device__ void diagonalCore(const CeedInt nelem, const bool pointBlock, const CeedScalar *identity, const CeedScalar *interpin,
422b730f8bSJeremy L Thompson                              const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein,
432b730f8bSJeremy L Thompson                              const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) {
4407b31e0eSJeremy L Thompson   const int tid = threadIdx.x;  // running with P threads, tid is evec node
4569bf922dSJeremy L Thompson   if (tid >= NNODES) return;
4607b31e0eSJeremy L Thompson 
4707b31e0eSJeremy L Thompson   // Compute the diagonal of B^T D B
4807b31e0eSJeremy L Thompson   // Each element
49*f7c1b517Snbeams   for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < nelem; e += gridDim.x * blockDim.z) {
50*f7c1b517Snbeams     IndexType dout = -1;
5107b31e0eSJeremy L Thompson     // Each basis eval mode pair
52*f7c1b517Snbeams     for (IndexType eout = 0; eout < NUMEMODEOUT; eout++) {
5307b31e0eSJeremy L Thompson       const CeedScalar *bt = NULL;
542b730f8bSJeremy L Thompson       if (emodeout[eout] == CEED_EVAL_GRAD) dout += 1;
552b730f8bSJeremy L Thompson       CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout, &gradout[dout * NQPTS * NNODES]);
56*f7c1b517Snbeams       IndexType din = -1;
57*f7c1b517Snbeams       for (IndexType ein = 0; ein < NUMEMODEIN; ein++) {
5807b31e0eSJeremy L Thompson         const CeedScalar *b = NULL;
592b730f8bSJeremy L Thompson         if (emodein[ein] == CEED_EVAL_GRAD) din += 1;
602b730f8bSJeremy L Thompson         CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin, &gradin[din * NQPTS * NNODES]);
6107b31e0eSJeremy L Thompson         // Each component
62*f7c1b517Snbeams         for (IndexType compOut = 0; compOut < NCOMP; compOut++) {
6307b31e0eSJeremy L Thompson           // Each qpoint/node pair
6407b31e0eSJeremy L Thompson           if (pointBlock) {
6507b31e0eSJeremy L Thompson             // Point Block Diagonal
66*f7c1b517Snbeams             for (IndexType compIn = 0; compIn < NCOMP; compIn++) {
6707b31e0eSJeremy L Thompson               CeedScalar evalue = 0.;
68*f7c1b517Snbeams               for (IndexType q = 0; q < NQPTS; q++) {
6907b31e0eSJeremy L Thompson                 const CeedScalar qfvalue =
702b730f8bSJeremy L Thompson                     assembledqfarray[((((ein * NCOMP + compIn) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q];
7107b31e0eSJeremy L Thompson                 evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid];
7207b31e0eSJeremy L Thompson               }
7307b31e0eSJeremy L Thompson               elemdiagarray[((compOut * NCOMP + compIn) * nelem + e) * NNODES + tid] += evalue;
7407b31e0eSJeremy L Thompson             }
7507b31e0eSJeremy L Thompson           } else {
7607b31e0eSJeremy L Thompson             // Diagonal Only
7707b31e0eSJeremy L Thompson             CeedScalar evalue = 0.;
78*f7c1b517Snbeams             for (IndexType q = 0; q < NQPTS; q++) {
7907b31e0eSJeremy L Thompson               const CeedScalar qfvalue =
802b730f8bSJeremy L Thompson                   assembledqfarray[((((ein * NCOMP + compOut) * NUMEMODEOUT + eout) * NCOMP + compOut) * nelem + e) * NQPTS + q];
8107b31e0eSJeremy L Thompson               evalue += bt[q * NNODES + tid] * qfvalue * b[q * NNODES + tid];
8207b31e0eSJeremy L Thompson             }
8307b31e0eSJeremy L Thompson             elemdiagarray[(compOut * nelem + e) * NNODES + tid] += evalue;
8407b31e0eSJeremy L Thompson           }
8507b31e0eSJeremy L Thompson         }
8607b31e0eSJeremy L Thompson       }
8707b31e0eSJeremy L Thompson     }
8807b31e0eSJeremy L Thompson   }
8907b31e0eSJeremy L Thompson }
9007b31e0eSJeremy L Thompson 
9107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
9207b31e0eSJeremy L Thompson // Linear diagonal
9307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
942b730f8bSJeremy L Thompson extern "C" __global__ void linearDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin,
952b730f8bSJeremy L Thompson                                           const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein,
962b730f8bSJeremy L Thompson                                           const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray,
9707b31e0eSJeremy L Thompson                                           CeedScalar *__restrict__ elemdiagarray) {
982b730f8bSJeremy L Thompson   diagonalCore(nelem, false, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
9907b31e0eSJeremy L Thompson }
10007b31e0eSJeremy L Thompson 
10107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
10207b31e0eSJeremy L Thompson // Linear point block diagonal
10307b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
1042b730f8bSJeremy L Thompson extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, const CeedScalar *identity, const CeedScalar *interpin,
1052b730f8bSJeremy L Thompson                                                     const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout,
10607b31e0eSJeremy L Thompson                                                     const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
1072b730f8bSJeremy L Thompson                                                     const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) {
1082b730f8bSJeremy L Thompson   diagonalCore(nelem, true, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
10907b31e0eSJeremy L Thompson }
11007b31e0eSJeremy L Thompson 
11107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------
112