1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for CUDA operator diagonal assembly 10 #ifndef CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H 11 #define CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H 12 13 #include <ceed.h> 14 15 #if USE_CEEDSIZE 16 typedef CeedSize IndexType; 17 #else 18 typedef CeedInt IndexType; 19 #endif 20 21 //------------------------------------------------------------------------------ 22 // Get Basis Emode Pointer 23 //------------------------------------------------------------------------------ 24 extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basis_ptr, CeedEvalMode e_mode, const CeedScalar *identity, 25 const CeedScalar *interp, const CeedScalar *grad) { 26 switch (e_mode) { 27 case CEED_EVAL_NONE: 28 *basis_ptr = identity; 29 break; 30 case CEED_EVAL_INTERP: 31 *basis_ptr = interp; 32 break; 33 case CEED_EVAL_GRAD: 34 *basis_ptr = grad; 35 break; 36 case CEED_EVAL_WEIGHT: 37 case CEED_EVAL_DIV: 38 case CEED_EVAL_CURL: 39 break; // Caught by QF Assembly 40 } 41 } 42 43 //------------------------------------------------------------------------------ 44 // Core code for diagonal assembly 45 //------------------------------------------------------------------------------ 46 __device__ void diagonalCore(const CeedInt num_elem, const bool is_point_block, const CeedScalar *identity, const CeedScalar *interp_in, 47 const CeedScalar *grad_in, const CeedScalar *interp_out, const CeedScalar *grad_out, const CeedEvalMode *e_mode_in, 48 const CeedEvalMode *e_mode_out, const CeedScalar *__restrict__ assembled_qf_array, 49 CeedScalar *__restrict__ elem_diag_array) { 50 const int tid = threadIdx.x; // running with P threads, tid is evec node 51 if (tid >= NUM_NODES) return; 52 53 // Compute the diagonal of B^T D B 54 // Each element 55 for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < num_elem; e += gridDim.x * blockDim.z) { 56 IndexType d_out = -1; 57 58 // Each basis eval mode pair 59 for (IndexType e_out = 0; e_out < NUM_E_MODE_OUT; e_out++) { 60 const CeedScalar *b_t = NULL; 61 62 if (e_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1; 63 CeedOperatorGetBasisPointer_Cuda(&b_t, e_mode_out[e_out], identity, interp_out, &grad_out[d_out * NUM_QPTS * NUM_NODES]); 64 IndexType d_in = -1; 65 66 for (IndexType e_in = 0; e_in < NUM_E_MODE_IN; e_in++) { 67 const CeedScalar *b = NULL; 68 69 if (e_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1; 70 CeedOperatorGetBasisPointer_Cuda(&b, e_mode_in[e_in], identity, interp_in, &grad_in[d_in * NUM_QPTS * NUM_NODES]); 71 // Each component 72 for (IndexType comp_out = 0; comp_out < NUM_COMP; comp_out++) { 73 // Each qpoint/node pair 74 if (is_point_block) { 75 // Point Block Diagonal 76 for (IndexType comp_in = 0; comp_in < NUM_COMP; comp_in++) { 77 CeedScalar e_value = 0.; 78 79 for (IndexType q = 0; q < NUM_QPTS; q++) { 80 const CeedScalar qf_value = 81 assembled_qf_array[((((e_in * NUM_COMP + comp_in) * NUM_E_MODE_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS + 82 q]; 83 84 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid]; 85 } 86 elem_diag_array[((comp_out * NUM_COMP + comp_in) * num_elem + e) * NUM_NODES + tid] += e_value; 87 } 88 } else { 89 // Diagonal Only 90 CeedScalar e_value = 0.; 91 92 for (IndexType q = 0; q < NUM_QPTS; q++) { 93 const CeedScalar qf_value = 94 assembled_qf_array[((((e_in * NUM_COMP + comp_out) * NUM_E_MODE_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS + q]; 95 96 e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid]; 97 } 98 elem_diag_array[(comp_out * num_elem + e) * NUM_NODES + tid] += e_value; 99 } 100 } 101 } 102 } 103 } 104 } 105 106 //------------------------------------------------------------------------------ 107 // Linear diagonal 108 //------------------------------------------------------------------------------ 109 extern "C" __global__ void linearDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, const CeedScalar *grad_in, 110 const CeedScalar *interp_out, const CeedScalar *grad_out, const CeedEvalMode *e_mode_in, 111 const CeedEvalMode *e_mode_out, const CeedScalar *__restrict__ assembled_qf_array, 112 CeedScalar *__restrict__ elem_diag_array) { 113 diagonalCore(num_elem, false, identity, interp_in, grad_in, interp_out, grad_out, e_mode_in, e_mode_out, assembled_qf_array, elem_diag_array); 114 } 115 116 //------------------------------------------------------------------------------ 117 // Linear point block diagonal 118 //------------------------------------------------------------------------------ 119 extern "C" __global__ void linearPointBlockDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, 120 const CeedScalar *grad_in, const CeedScalar *interp_out, const CeedScalar *grad_out, 121 const CeedEvalMode *e_mode_in, const CeedEvalMode *e_mode_out, 122 const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) { 123 diagonalCore(num_elem, true, identity, interp_in, grad_in, interp_out, grad_out, e_mode_in, e_mode_out, assembled_qf_array, elem_diag_array); 124 } 125 126 //------------------------------------------------------------------------------ 127 128 #endif // CEED_CUDA_REF_OPERATOR_ASSEMBLE_DIAGONAL_H 129