xref: /libCEED/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h (revision 74d1b6c4d74ab01eefbd8940b6e3a2db84651b5c)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Internal header for CUDA operator diagonal assembly
10 #ifndef CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H
11 #define CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H
12 
13 #include <ceed.h>
14 
15 #if USE_CEEDSIZE
16 typedef CeedSize IndexType;
17 #else
18 typedef CeedInt IndexType;
19 #endif
20 
21 //------------------------------------------------------------------------------
22 // Get basis pointer
23 //------------------------------------------------------------------------------
24 static __device__ __inline__ void GetBasisPointer(const CeedScalar **basis_ptr, CeedEvalMode eval_modes, const CeedScalar *identity,
25                                                   const CeedScalar *interp, const CeedScalar *grad, const CeedScalar *div, const CeedScalar *curl) {
26   switch (eval_modes) {
27     case CEED_EVAL_NONE:
28       *basis_ptr = identity;
29       break;
30     case CEED_EVAL_INTERP:
31       *basis_ptr = interp;
32       break;
33     case CEED_EVAL_GRAD:
34       *basis_ptr = grad;
35       break;
36     case CEED_EVAL_DIV:
37       *basis_ptr = div;
38       break;
39     case CEED_EVAL_CURL:
40       *basis_ptr = curl;
41       break;
42     case CEED_EVAL_WEIGHT:
43       break;  // Caught by QF assembly
44   }
45 }
46 
47 //------------------------------------------------------------------------------
48 // Core code for diagonal assembly
49 //------------------------------------------------------------------------------
50 static __device__ __inline__ void DiagonalCore(const CeedInt num_elem, const bool is_point_block, const CeedScalar *identity,
51                                                const CeedScalar *interp_in, const CeedScalar *grad_in, const CeedScalar *div_in,
52                                                const CeedScalar *curl_in, const CeedScalar *interp_out, const CeedScalar *grad_out,
53                                                const CeedScalar *div_out, const CeedScalar *curl_out, const CeedEvalMode *eval_modes_in,
54                                                const CeedEvalMode *eval_modes_out, const CeedScalar *__restrict__ assembled_qf_array,
55                                                CeedScalar *__restrict__ elem_diag_array) {
56   const int tid = threadIdx.x;  // Running with P threads
57 
58   if (tid >= NUM_NODES) return;
59 
60   // Compute the diagonal of B^T D B
61   // Each element
62   for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < num_elem; e += gridDim.x * blockDim.z) {
63     // Each basis eval mode pair
64     IndexType    d_out               = 0;
65     CeedEvalMode eval_modes_out_prev = CEED_EVAL_NONE;
66 
67     for (IndexType e_out = 0; e_out < NUM_EVAL_MODES_OUT; e_out++) {
68       IndexType         d_in               = 0;
69       CeedEvalMode      eval_modes_in_prev = CEED_EVAL_NONE;
70       const CeedScalar *b_t                = NULL;
71 
72       GetBasisPointer(&b_t, eval_modes_out[e_out], identity, interp_out, grad_out, div_out, curl_out);
73       if (e_out == 0 || eval_modes_out[e_out] != eval_modes_out_prev) d_out = 0;
74       else b_t = &b_t[(++d_out) * NUM_QPTS * NUM_NODES];
75       eval_modes_out_prev = eval_modes_out[e_out];
76 
77       for (IndexType e_in = 0; e_in < NUM_EVAL_MODES_IN; e_in++) {
78         const CeedScalar *b = NULL;
79 
80         GetBasisPointer(&b, eval_modes_in[e_in], identity, interp_in, grad_in, div_in, curl_in);
81         if (e_in == 0 || eval_modes_in[e_in] != eval_modes_in_prev) d_in = 0;
82         else b = &b[(++d_in) * NUM_QPTS * NUM_NODES];
83         eval_modes_in_prev = eval_modes_in[e_in];
84 
85         // Each component
86         for (IndexType comp_out = 0; comp_out < NUM_COMP; comp_out++) {
87           // Each qpoint/node pair
88           if (is_point_block) {
89             // Point block diagonal
90             for (IndexType comp_in = 0; comp_in < NUM_COMP; comp_in++) {
91               CeedScalar e_value = 0.;
92 
93               for (IndexType q = 0; q < NUM_QPTS; q++) {
94                 const CeedScalar qf_value =
95                     assembled_qf_array[((((e_in * NUM_COMP + comp_in) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) *
96                                            NUM_QPTS +
97                                        q];
98 
99                 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid];
100               }
101               elem_diag_array[((comp_out * NUM_COMP + comp_in) * num_elem + e) * NUM_NODES + tid] += e_value;
102             }
103           } else {
104             // Diagonal only
105             CeedScalar e_value = 0.;
106 
107             for (IndexType q = 0; q < NUM_QPTS; q++) {
108               const CeedScalar qf_value =
109                   assembled_qf_array[((((e_in * NUM_COMP + comp_out) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS +
110                                      q];
111 
112               e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid];
113             }
114             elem_diag_array[(comp_out * num_elem + e) * NUM_NODES + tid] += e_value;
115           }
116         }
117       }
118     }
119   }
120 }
121 
122 //------------------------------------------------------------------------------
123 // Linear diagonal
124 //------------------------------------------------------------------------------
125 extern "C" __global__ void LinearDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, const CeedScalar *grad_in,
126                                           const CeedScalar *div_in, const CeedScalar *curl_in, const CeedScalar *interp_out,
127                                           const CeedScalar *grad_out, const CeedScalar *div_out, const CeedScalar *curl_out,
128                                           const CeedEvalMode *eval_modes_in, const CeedEvalMode *eval_modes_out,
129                                           const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) {
130   DiagonalCore(num_elem, false, identity, interp_in, grad_in, div_in, curl_in, interp_out, grad_out, div_out, curl_out, eval_modes_in, eval_modes_out,
131                assembled_qf_array, elem_diag_array);
132 }
133 
134 //------------------------------------------------------------------------------
135 // Linear point block diagonal
136 //------------------------------------------------------------------------------
137 extern "C" __global__ void LinearPointBlockDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in,
138                                                     const CeedScalar *grad_in, const CeedScalar *div_in, const CeedScalar *curl_in,
139                                                     const CeedScalar *interp_out, const CeedScalar *grad_out, const CeedScalar *div_out,
140                                                     const CeedScalar *curl_out, const CeedEvalMode *eval_modes_in, const CeedEvalMode *eval_modes_out,
141                                                     const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) {
142   DiagonalCore(num_elem, true, identity, interp_in, grad_in, div_in, curl_in, interp_out, grad_out, div_out, curl_out, eval_modes_in, eval_modes_out,
143                assembled_qf_array, elem_diag_array);
144 }
145 
146 //------------------------------------------------------------------------------
147 
148 #endif  // CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H
149