1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 /// @file
9 /// Internal header for HIP operator diagonal assembly
10 #include <ceed/types.h>
11
12 #if USE_CEEDSIZE
13 typedef CeedSize IndexType;
14 #else
15 typedef CeedInt IndexType;
16 #endif
17
18 //------------------------------------------------------------------------------
19 // Get basis pointer
20 //------------------------------------------------------------------------------
GetBasisPointer(const CeedScalar ** basis_ptr,CeedEvalMode eval_modes,const CeedScalar * identity,const CeedScalar * interp,const CeedScalar * grad,const CeedScalar * div,const CeedScalar * curl)21 static __device__ __inline__ void GetBasisPointer(const CeedScalar **basis_ptr, CeedEvalMode eval_modes, const CeedScalar *identity,
22 const CeedScalar *interp, const CeedScalar *grad, const CeedScalar *div, const CeedScalar *curl) {
23 switch (eval_modes) {
24 case CEED_EVAL_NONE:
25 *basis_ptr = identity;
26 break;
27 case CEED_EVAL_INTERP:
28 *basis_ptr = interp;
29 break;
30 case CEED_EVAL_GRAD:
31 *basis_ptr = grad;
32 break;
33 case CEED_EVAL_DIV:
34 *basis_ptr = div;
35 break;
36 case CEED_EVAL_CURL:
37 *basis_ptr = curl;
38 break;
39 case CEED_EVAL_WEIGHT:
40 break; // Caught by QF assembly
41 }
42 }
43
44 //------------------------------------------------------------------------------
45 // Core code for diagonal assembly
46 //------------------------------------------------------------------------------
__launch_bounds__(BLOCK_SIZE)47 extern "C" __launch_bounds__(BLOCK_SIZE) __global__
48 void LinearDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, const CeedScalar *grad_in,
49 const CeedScalar *div_in, const CeedScalar *curl_in, const CeedScalar *interp_out, const CeedScalar *grad_out,
50 const CeedScalar *div_out, const CeedScalar *curl_out, const CeedEvalMode *eval_modes_in, const CeedEvalMode *eval_modes_out,
51 const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) {
52 const int tid = threadIdx.x; // Running with P threads
53
54 if (tid >= NUM_NODES) return;
55
56 // Compute the diagonal of B^T D B
57 // Each element
58 for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < num_elem; e += gridDim.x * blockDim.z) {
59 // Each basis eval mode pair
60 IndexType d_out = 0;
61 CeedEvalMode eval_modes_out_prev = CEED_EVAL_NONE;
62
63 for (IndexType e_out = 0; e_out < NUM_EVAL_MODES_OUT; e_out++) {
64 IndexType d_in = 0;
65 CeedEvalMode eval_modes_in_prev = CEED_EVAL_NONE;
66 const CeedScalar *b_t = NULL;
67
68 GetBasisPointer(&b_t, eval_modes_out[e_out], identity, interp_out, grad_out, div_out, curl_out);
69 if (e_out == 0 || eval_modes_out[e_out] != eval_modes_out_prev) d_out = 0;
70 else b_t = &b_t[(++d_out) * NUM_QPTS * NUM_NODES];
71 eval_modes_out_prev = eval_modes_out[e_out];
72
73 for (IndexType e_in = 0; e_in < NUM_EVAL_MODES_IN; e_in++) {
74 const CeedScalar *b = NULL;
75
76 GetBasisPointer(&b, eval_modes_in[e_in], identity, interp_in, grad_in, div_in, curl_in);
77 if (e_in == 0 || eval_modes_in[e_in] != eval_modes_in_prev) d_in = 0;
78 else b = &b[(++d_in) * NUM_QPTS * NUM_NODES];
79 eval_modes_in_prev = eval_modes_in[e_in];
80
81 // Each component
82 for (IndexType comp_out = 0; comp_out < NUM_COMP; comp_out++) {
83 #if USE_POINT_BLOCK
84 // Point block diagonal
85 for (IndexType comp_in = 0; comp_in < NUM_COMP; comp_in++) {
86 CeedScalar e_value = 0.;
87
88 // Each qpoint/node pair
89 for (IndexType q = 0; q < NUM_QPTS; q++) {
90 const CeedScalar qf_value =
91 assembled_qf_array[((((e_in * NUM_COMP + comp_in) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS +
92 q];
93
94 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid];
95 }
96 elem_diag_array[((comp_out * NUM_COMP + comp_in) * num_elem + e) * NUM_NODES + tid] += e_value;
97 }
98 #else
99 // Diagonal only
100 CeedScalar e_value = 0.;
101
102 // Each qpoint/node pair
103 for (IndexType q = 0; q < NUM_QPTS; q++) {
104 const CeedScalar qf_value =
105 assembled_qf_array[((((e_in * NUM_COMP + comp_out) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS +
106 q];
107
108 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid];
109 }
110 elem_diag_array[(comp_out * num_elem + e) * NUM_NODES + tid] += e_value;
111 #endif
112 }
113 }
114 }
115 }
116 }
117