xref: /libCEED/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h (revision ca7355308a14805110a7d98dabf1f658d5ec16d5)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Internal header for CUDA operator diagonal assembly
10 #ifndef CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H
11 #define CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H
12 
13 #include <ceed.h>
14 
15 #if USE_CEEDSIZE
16 typedef CeedSize IndexType;
17 #else
18 typedef CeedInt IndexType;
19 #endif
20 
21 //------------------------------------------------------------------------------
22 // Get Basis Emode Pointer
23 //------------------------------------------------------------------------------
24 extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basis_ptr, CeedEvalMode e_mode, const CeedScalar *identity,
25                                                             const CeedScalar *interp, const CeedScalar *grad) {
26   switch (e_mode) {
27     case CEED_EVAL_NONE:
28       *basis_ptr = identity;
29       break;
30     case CEED_EVAL_INTERP:
31       *basis_ptr = interp;
32       break;
33     case CEED_EVAL_GRAD:
34       *basis_ptr = grad;
35       break;
36     case CEED_EVAL_WEIGHT:
37     case CEED_EVAL_DIV:
38     case CEED_EVAL_CURL:
39       break;  // Caught by QF Assembly
40   }
41 }
42 
43 //------------------------------------------------------------------------------
44 // Core code for diagonal assembly
45 //------------------------------------------------------------------------------
46 __device__ void diagonalCore(const CeedInt num_elem, const bool is_point_block, const CeedScalar *identity, const CeedScalar *interp_in,
47                              const CeedScalar *grad_in, const CeedScalar *interp_out, const CeedScalar *grad_out, const CeedEvalMode *e_mode_in,
48                              const CeedEvalMode *e_mode_out, const CeedScalar *__restrict__ assembled_qf_array,
49                              CeedScalar *__restrict__ elem_diag_array) {
50   const int tid = threadIdx.x;  // running with P threads, tid is evec node
51   if (tid >= NUM_NODES) return;
52 
53   // Compute the diagonal of B^T D B
54   // Each element
55   for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < num_elem; e += gridDim.x * blockDim.z) {
56     IndexType d_out = -1;
57 
58     // Each basis eval mode pair
59     for (IndexType e_out = 0; e_out < NUM_E_MODE_OUT; e_out++) {
60       const CeedScalar *b_t = NULL;
61 
62       if (e_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
63       CeedOperatorGetBasisPointer_Cuda(&b_t, e_mode_out[e_out], identity, interp_out, &grad_out[d_out * NUM_QPTS * NUM_NODES]);
64       IndexType d_in = -1;
65 
66       for (IndexType e_in = 0; e_in < NUM_E_MODE_IN; e_in++) {
67         const CeedScalar *b = NULL;
68 
69         if (e_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
70         CeedOperatorGetBasisPointer_Cuda(&b, e_mode_in[e_in], identity, interp_in, &grad_in[d_in * NUM_QPTS * NUM_NODES]);
71         // Each component
72         for (IndexType comp_out = 0; comp_out < NUM_COMP; comp_out++) {
73           // Each qpoint/node pair
74           if (is_point_block) {
75             // Point Block Diagonal
76             for (IndexType comp_in = 0; comp_in < NUM_COMP; comp_in++) {
77               CeedScalar e_value = 0.;
78 
79               for (IndexType q = 0; q < NUM_QPTS; q++) {
80                 const CeedScalar qf_value =
81                     assembled_qf_array[((((e_in * NUM_COMP + comp_in) * NUM_E_MODE_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS +
82                                        q];
83 
84                 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid];
85               }
86               elem_diag_array[((comp_out * NUM_COMP + comp_in) * num_elem + e) * NUM_NODES + tid] += e_value;
87             }
88           } else {
89             // Diagonal Only
90             CeedScalar e_value = 0.;
91 
92             for (IndexType q = 0; q < NUM_QPTS; q++) {
93               const CeedScalar qf_value =
94                   assembled_qf_array[((((e_in * NUM_COMP + comp_out) * NUM_E_MODE_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS + q];
95 
96               e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid];
97             }
98             elem_diag_array[(comp_out * num_elem + e) * NUM_NODES + tid] += e_value;
99           }
100         }
101       }
102     }
103   }
104 }
105 
106 //------------------------------------------------------------------------------
107 // Linear diagonal
108 //------------------------------------------------------------------------------
109 extern "C" __global__ void linearDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, const CeedScalar *grad_in,
110                                           const CeedScalar *interp_out, const CeedScalar *grad_out, const CeedEvalMode *e_mode_in,
111                                           const CeedEvalMode *e_mode_out, const CeedScalar *__restrict__ assembled_qf_array,
112                                           CeedScalar *__restrict__ elem_diag_array) {
113   diagonalCore(num_elem, false, identity, interp_in, grad_in, interp_out, grad_out, e_mode_in, e_mode_out, assembled_qf_array, elem_diag_array);
114 }
115 
116 //------------------------------------------------------------------------------
117 // Linear point block diagonal
118 //------------------------------------------------------------------------------
119 extern "C" __global__ void linearPointBlockDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in,
120                                                     const CeedScalar *grad_in, const CeedScalar *interp_out, const CeedScalar *grad_out,
121                                                     const CeedEvalMode *e_mode_in, const CeedEvalMode *e_mode_out,
122                                                     const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) {
123   diagonalCore(num_elem, true, identity, interp_in, grad_in, interp_out, grad_out, e_mode_in, e_mode_out, assembled_qf_array, elem_diag_array);
124 }
125 
126 //------------------------------------------------------------------------------
127 
128 #endif  // CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H
129