15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 207b31e0eSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 307b31e0eSJeremy L Thompson // 407b31e0eSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 507b31e0eSJeremy L Thompson // 607b31e0eSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 707b31e0eSJeremy L Thompson 8b2165e7aSSebastian Grimberg /// @file 9b2165e7aSSebastian Grimberg /// Internal header for HIP operator diagonal assembly 10b2165e7aSSebastian Grimberg 11*c0b5abf0SJeremy L Thompson #include <ceed/types.h> 1207b31e0eSJeremy L Thompson 13004e4986SSebastian Grimberg #if USE_CEEDSIZE 149330daecSnbeams typedef CeedSize IndexType; 159330daecSnbeams #else 169330daecSnbeams typedef CeedInt IndexType; 179330daecSnbeams #endif 189330daecSnbeams 1907b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 20004e4986SSebastian Grimberg // Get basis pointer 2107b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 22004e4986SSebastian Grimberg static __device__ __inline__ void GetBasisPointer(const CeedScalar **basis_ptr, CeedEvalMode eval_modes, const CeedScalar *identity, 23004e4986SSebastian Grimberg const CeedScalar *interp, const CeedScalar *grad, const CeedScalar *div, const CeedScalar *curl) { 24004e4986SSebastian Grimberg switch (eval_modes) { 2507b31e0eSJeremy L Thompson case CEED_EVAL_NONE: 26004e4986SSebastian Grimberg *basis_ptr = identity; 2707b31e0eSJeremy L Thompson break; 2807b31e0eSJeremy L Thompson case CEED_EVAL_INTERP: 29004e4986SSebastian Grimberg *basis_ptr = interp; 3007b31e0eSJeremy L Thompson break; 3107b31e0eSJeremy L Thompson case CEED_EVAL_GRAD: 32004e4986SSebastian Grimberg *basis_ptr = grad; 33004e4986SSebastian Grimberg break; 34004e4986SSebastian Grimberg case CEED_EVAL_DIV: 35004e4986SSebastian Grimberg *basis_ptr = div; 36004e4986SSebastian Grimberg break; 37004e4986SSebastian Grimberg case CEED_EVAL_CURL: 38004e4986SSebastian Grimberg *basis_ptr = curl; 3907b31e0eSJeremy L Thompson break; 4007b31e0eSJeremy L Thompson case CEED_EVAL_WEIGHT: 41004e4986SSebastian Grimberg break; // Caught by QF assembly 4207b31e0eSJeremy L Thompson } 4307b31e0eSJeremy L Thompson } 4407b31e0eSJeremy L Thompson 4507b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 4607b31e0eSJeremy L Thompson // Core code for diagonal assembly 4707b31e0eSJeremy L Thompson //------------------------------------------------------------------------------ 48cbfe683aSSebastian Grimberg extern "C" __launch_bounds__(BLOCK_SIZE) __global__ 49cbfe683aSSebastian Grimberg void LinearDiagonal(const CeedInt num_elem, const CeedScalar *identity, const CeedScalar *interp_in, const CeedScalar *grad_in, 50cbfe683aSSebastian Grimberg const CeedScalar *div_in, const CeedScalar *curl_in, const CeedScalar *interp_out, const CeedScalar *grad_out, 51cbfe683aSSebastian Grimberg const CeedScalar *div_out, const CeedScalar *curl_out, const CeedEvalMode *eval_modes_in, const CeedEvalMode *eval_modes_out, 52cbfe683aSSebastian Grimberg const CeedScalar *__restrict__ assembled_qf_array, CeedScalar *__restrict__ elem_diag_array) { 53004e4986SSebastian Grimberg const int tid = threadIdx.x; // Running with P threads 54004e4986SSebastian Grimberg 55004e4986SSebastian Grimberg if (tid >= NUM_NODES) return; 5607b31e0eSJeremy L Thompson 5707b31e0eSJeremy L Thompson // Compute the diagonal of B^T D B 5807b31e0eSJeremy L Thompson // Each element 59004e4986SSebastian Grimberg for (IndexType e = blockIdx.x * blockDim.z + threadIdx.z; e < num_elem; e += gridDim.x * blockDim.z) { 6007b31e0eSJeremy L Thompson // Each basis eval mode pair 61004e4986SSebastian Grimberg IndexType d_out = 0; 62004e4986SSebastian Grimberg CeedEvalMode eval_modes_out_prev = CEED_EVAL_NONE; 63004e4986SSebastian Grimberg 64004e4986SSebastian Grimberg for (IndexType e_out = 0; e_out < NUM_EVAL_MODES_OUT; e_out++) { 65004e4986SSebastian Grimberg IndexType d_in = 0; 66004e4986SSebastian Grimberg CeedEvalMode eval_modes_in_prev = CEED_EVAL_NONE; 67004e4986SSebastian Grimberg const CeedScalar *b_t = NULL; 68004e4986SSebastian Grimberg 69004e4986SSebastian Grimberg GetBasisPointer(&b_t, eval_modes_out[e_out], identity, interp_out, grad_out, div_out, curl_out); 70004e4986SSebastian Grimberg if (e_out == 0 || eval_modes_out[e_out] != eval_modes_out_prev) d_out = 0; 71004e4986SSebastian Grimberg else b_t = &b_t[(++d_out) * NUM_QPTS * NUM_NODES]; 72004e4986SSebastian Grimberg eval_modes_out_prev = eval_modes_out[e_out]; 73004e4986SSebastian Grimberg 74004e4986SSebastian Grimberg for (IndexType e_in = 0; e_in < NUM_EVAL_MODES_IN; e_in++) { 7507b31e0eSJeremy L Thompson const CeedScalar *b = NULL; 76004e4986SSebastian Grimberg 77004e4986SSebastian Grimberg GetBasisPointer(&b, eval_modes_in[e_in], identity, interp_in, grad_in, div_in, curl_in); 78004e4986SSebastian Grimberg if (e_in == 0 || eval_modes_in[e_in] != eval_modes_in_prev) d_in = 0; 79004e4986SSebastian Grimberg else b = &b[(++d_in) * NUM_QPTS * NUM_NODES]; 80004e4986SSebastian Grimberg eval_modes_in_prev = eval_modes_in[e_in]; 81004e4986SSebastian Grimberg 8207b31e0eSJeremy L Thompson // Each component 83004e4986SSebastian Grimberg for (IndexType comp_out = 0; comp_out < NUM_COMP; comp_out++) { 84cbfe683aSSebastian Grimberg #if USE_POINT_BLOCK 85004e4986SSebastian Grimberg // Point block diagonal 86004e4986SSebastian Grimberg for (IndexType comp_in = 0; comp_in < NUM_COMP; comp_in++) { 87004e4986SSebastian Grimberg CeedScalar e_value = 0.; 88004e4986SSebastian Grimberg 89cbfe683aSSebastian Grimberg // Each qpoint/node pair 90004e4986SSebastian Grimberg for (IndexType q = 0; q < NUM_QPTS; q++) { 91004e4986SSebastian Grimberg const CeedScalar qf_value = 92cbfe683aSSebastian Grimberg assembled_qf_array[((((e_in * NUM_COMP + comp_in) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS + 93004e4986SSebastian Grimberg q]; 94004e4986SSebastian Grimberg 95004e4986SSebastian Grimberg e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid]; 9607b31e0eSJeremy L Thompson } 97004e4986SSebastian Grimberg elem_diag_array[((comp_out * NUM_COMP + comp_in) * num_elem + e) * NUM_NODES + tid] += e_value; 9807b31e0eSJeremy L Thompson } 99cbfe683aSSebastian Grimberg #else 100004e4986SSebastian Grimberg // Diagonal only 101004e4986SSebastian Grimberg CeedScalar e_value = 0.; 102004e4986SSebastian Grimberg 103cbfe683aSSebastian Grimberg // Each qpoint/node pair 104004e4986SSebastian Grimberg for (IndexType q = 0; q < NUM_QPTS; q++) { 105004e4986SSebastian Grimberg const CeedScalar qf_value = 106004e4986SSebastian Grimberg assembled_qf_array[((((e_in * NUM_COMP + comp_out) * NUM_EVAL_MODES_OUT + e_out) * NUM_COMP + comp_out) * num_elem + e) * NUM_QPTS + 107004e4986SSebastian Grimberg q]; 108004e4986SSebastian Grimberg 109004e4986SSebastian Grimberg e_value += b_t[q * NUM_NODES + tid] * qf_value * b[q * NUM_NODES + tid]; 11007b31e0eSJeremy L Thompson } 111004e4986SSebastian Grimberg elem_diag_array[(comp_out * num_elem + e) * NUM_NODES + tid] += e_value; 112cbfe683aSSebastian Grimberg #endif 11307b31e0eSJeremy L Thompson } 11407b31e0eSJeremy L Thompson } 11507b31e0eSJeremy L Thompson } 11607b31e0eSJeremy L Thompson } 11707b31e0eSJeremy L Thompson } 118