1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP operator diagonal assembly 10 #ifndef CEED_HIP_REF_OPERATOR_ASSEMBLE_DIAGONAL_H 11 #define CEED_HIP_REF_OPERATOR_ASSEMBLE_DIAGONAL_H 12 13 #include <ceed.h> 14 15 #if USE_CEEDSIZE 16 typedef CeedSize IndexType; 17 #else 18 typedef CeedInt IndexType; 19 #endif 20 21 //------------------------------------------------------------------------------ 22 // Get basis pointer 23 //------------------------------------------------------------------------------ 24 static __device__ __inline__ void GetBasisPointer(const CeedScalar **basis_ptr, CeedEvalMode eval_modes, const CeedScalar *identity, 25 const CeedScalar *interp, const CeedScalar *grad, const CeedScalar *div, const CeedScalar *curl) { 26 switch (eval_modes) { 27 case CEED_EVAL_NONE: 28 *basis_ptr = identity; 29 break; 30 case CEED_EVAL_INTERP: 31 *basis_ptr = interp; 32 break; 33 case CEED_EVAL_GRAD: 34 *basis_ptr = grad; 35 break; 36 case CEED_EVAL_DIV: 37 *basis_ptr = div; 38 break; 39 case CEED_EVAL_CURL: 40 *basis_ptr = curl; 41 break; 42 case CEED_EVAL_WEIGHT: 43 break; // Caught by QF assembly 44 } 45 } 46 47 //------------------------------------------------------------------------------ 48 // Core code for diagonal assembly 49 //------------------------------------------------------------------------------ 50 static __device__ __inline__ void DiagonalCore(const CeedInt num_elem, const bool is_point_block, const CeedScalar *identity, 51 const CeedScalar *interp_in, const CeedScalar *grad_in, const CeedScalar *div_in, 52 const CeedScalar *curl_in, const CeedScalar *interp_out, const CeedScalar *grad_out, 53 const CeedScalar *div_out, const CeedScalar *curl_out, const CeedEvalMode *eval_modes_in, 54 const CeedEvalMode *eval_modes_out, const CeedScalar *__restrict__ assembled_qf_array, 55 CeedScalar *__restrict__ elem_diag_array) { 56 const int tid = threadIdx.x; // Running with P threads 57 58 if (tid >= NUM_NODES) return; 59 60 // Compute the diagonal of B^T D B 61 // Each element 62 for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < num_elem; e += gridDim.x * blockDim.z) { 63 // Each basis eval mode pair 64 IndexType d_out = 0; 65 CeedEvalMode eval_modes_out_prev = CEED_EVAL_NONE; 66 67 for (IndexType e_out = 0; e_out < NUM_EVAL_MODES_OUT; e_out++) { 68 IndexType d_in = 0; 69 CeedEvalMode eval_modes_in_prev = CEED_EVAL_NONE; 70 const CeedScalar *b_t = NULL; 71 72 GetBasisPointer(&b_t, eval_modes_out[e_out], identity, interp_out, grad_out, div_out, curl_out); 73 if (e_out == 0 || eval_modes_out[e_out] != eval_modes_out_prev) d_out = 0; 74 else b_t = &b_t[(++d_out) * NUM_QPTS * NUM_NODES]; 75 eval_modes_out_prev = eval_modes_out[e_out]; 76 77 for (IndexType e_in = 0; e_in < NUM_EVAL_MODES_IN; e_in++) { 78 const CeedScalar *b = NULL; 79 80 GetBasisPointer(&b, eval_modes_in[e_in], identity, interp_in, grad_in, div_in, curl_in); 81 if (e_in == 0 || eval_modes_in[e_in] != eval_modes_in_prev) d_in = 0; 82 else b = &b[(++d_in) * NUM_QPTS * NUM_NODES]; 83 eval_modes_in_prev = eval_modes_in[e_in]; 84 85 // Each component 86 for (IndexType comp_out = 0; comp_out < NUM_COMP; comp_out++) { 87 // Each qpoint/node pair 88 if (is_point_block) { 89 // Point block diagonal 90 for (IndexType comp_in = 0; comp_in < NUM_COMP; comp_in++) { 91 CeedScalar e_value = 0.; 92 93 for (IndexType q = 0; q < NUM_QPTS; q++) { 94 const CeedScalar qf_value = 95 assembled_qf_array[((((e_in * NUM_COMP + comp_in) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * 96 NUM_QPTS + 97 q]; 98 99 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid]; 100 } 101 elem_diag_array[((comp_out * NUM_COMP + comp_in) * num_elem + e) * NUM_NODES + tid] += e_value; 102 } 103 } else { 104 // Diagonal only 105 CeedScalar e_value = 0.; 106 107 for (IndexType q = 0; q < NUM_QPTS; q++) { 108 const CeedScalar qf_value = 109 assembled_qf_array[((((e_in * NUM_COMP + comp_out) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS + 110 q]; 111 112 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid]; 113 } 114 elem_diag_array[(comp_out * num_elem + e) * NUM_NODES + tid] += e_value; 115 } 116 } 117 } 118 } 119 } 120 } 121 122 //------------------------------------------------------------------------------ 123 // Linear diagonal 124 //------------------------------------------------------------------------------ 125 extern "C" __global__ void LinearDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, const CeedScalar *grad_in, 126 const CeedScalar *div_in, const CeedScalar *curl_in, const CeedScalar *interp_out, 127 const CeedScalar *grad_out, const CeedScalar *div_out, const CeedScalar *curl_out, 128 const CeedEvalMode *eval_modes_in, const CeedEvalMode *eval_modes_out, 129 const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) { 130 DiagonalCore(num_elem, false, identity, interp_in, grad_in, div_in, curl_in, interp_out, grad_out, div_out, curl_out, eval_modes_in, eval_modes_out, 131 assembled_qf_array, elem_diag_array); 132 } 133 134 //------------------------------------------------------------------------------ 135 // Linear point block diagonal 136 //------------------------------------------------------------------------------ 137 extern "C" __global__ void LinearPointBlockDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, 138 const CeedScalar *grad_in, const CeedScalar *div_in, const CeedScalar *curl_in, 139 const CeedScalar *interp_out, const CeedScalar *grad_out, const CeedScalar *div_out, 140 const CeedScalar *curl_out, const CeedEvalMode *eval_modes_in, const CeedEvalMode *eval_modes_out, 141 const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) { 142 DiagonalCore(num_elem, true, identity, interp_in, grad_in, div_in, curl_in, interp_out, grad_out, div_out, curl_out, eval_modes_in, eval_modes_out, 143 assembled_qf_array, elem_diag_array); 144 } 145 146 //------------------------------------------------------------------------------ 147 148 #endif // CEED_HIP_REF_OPERATOR_ASSEMBLE_DIAGONAL_H 149