1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP shared memory tensor product basis templates 10 11 #include <ceed/types.h> 12 13 //------------------------------------------------------------------------------ 14 // 1D 15 //------------------------------------------------------------------------------ 16 17 //------------------------------------------------------------------------------ 18 // 1D tensor contraction x 19 //------------------------------------------------------------------------------ 20 template <int NUM_COMP, int P_1D, int Q_1D> 21 inline __device__ void ContractX1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 22 data.slice[data.t_id_x] = *U; 23 __syncthreads(); 24 *V = 0.0; 25 if (data.t_id_x < Q_1D) { 26 for (CeedInt i = 0; i < P_1D; i++) { 27 *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 28 } 29 } 30 __syncthreads(); 31 } 32 33 //------------------------------------------------------------------------------ 34 // 1D transpose tensor contraction x 35 //------------------------------------------------------------------------------ 36 template <int NUM_COMP, int P_1D, int Q_1D> 37 inline __device__ void ContractTransposeX1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 38 data.slice[data.t_id_x] = *U; 39 __syncthreads(); 40 *V = 0.0; 41 if (data.t_id_x < P_1D) { 42 for (CeedInt i = 0; i < Q_1D; i++) { 43 *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 44 } 45 } 46 __syncthreads(); 47 } 48 49 //------------------------------------------------------------------------------ 50 // 1D interpolate to quadrature points 51 //------------------------------------------------------------------------------ 52 template <int NUM_COMP, int P_1D, int Q_1D> 53 inline __device__ void Interp1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 54 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 55 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); 56 } 57 } 58 59 //------------------------------------------------------------------------------ 60 // 1D interpolate transpose 61 //------------------------------------------------------------------------------ 62 template <int NUM_COMP, int P_1D, int Q_1D> 63 inline __device__ void InterpTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 64 CeedScalar *__restrict__ r_V) { 65 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 66 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); 67 } 68 } 69 70 //------------------------------------------------------------------------------ 71 // 1D derivatives at quadrature points 72 //------------------------------------------------------------------------------ 73 template <int NUM_COMP, int P_1D, int Q_1D> 74 inline __device__ void Grad1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 75 CeedScalar *__restrict__ r_V) { 76 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 77 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); 78 } 79 } 80 81 //------------------------------------------------------------------------------ 82 // 1D derivatives transpose 83 //------------------------------------------------------------------------------ 84 template <int NUM_COMP, int P_1D, int Q_1D> 85 inline __device__ void GradTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 86 CeedScalar *__restrict__ r_V) { 87 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 88 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); 89 } 90 } 91 92 //------------------------------------------------------------------------------ 93 // 1D quadrature weights 94 //------------------------------------------------------------------------------ 95 template <int Q_1D> 96 inline __device__ void Weight1d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 97 *w = (data.t_id_x < Q_1D) ? q_weight_1d[data.t_id_x] : 0.0; 98 } 99 100 //------------------------------------------------------------------------------ 101 // 2D 102 //------------------------------------------------------------------------------ 103 104 //------------------------------------------------------------------------------ 105 // 2D tensor contraction x 106 //------------------------------------------------------------------------------ 107 template <int NUM_COMP, int P_1D, int Q_1D> 108 inline __device__ void ContractX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 109 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 110 __syncthreads(); 111 *V = 0.0; 112 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 113 for (CeedInt i = 0; i < P_1D; i++) { 114 *V += B[i + data.t_id_x * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 115 } 116 } 117 __syncthreads(); 118 } 119 120 //------------------------------------------------------------------------------ 121 // 2D tensor contract y 122 //------------------------------------------------------------------------------ 123 template <int NUM_COMP, int P_1D, int Q_1D> 124 inline __device__ void ContractY2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 125 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 126 __syncthreads(); 127 *V = 0.0; 128 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 129 for (CeedInt i = 0; i < P_1D; i++) { 130 *V += B[i + data.t_id_y * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 131 } 132 } 133 __syncthreads(); 134 } 135 136 //------------------------------------------------------------------------------ 137 // 2D transpose tensor contract y 138 //------------------------------------------------------------------------------ 139 template <int NUM_COMP, int P_1D, int Q_1D> 140 inline __device__ void ContractTransposeY2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 141 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 142 __syncthreads(); 143 *V = 0.0; 144 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 145 for (CeedInt i = 0; i < Q_1D; i++) { 146 *V += B[data.t_id_y + i * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 147 } 148 } 149 __syncthreads(); 150 } 151 152 //------------------------------------------------------------------------------ 153 // 2D transpose tensor contract x 154 //------------------------------------------------------------------------------ 155 template <int NUM_COMP, int P_1D, int Q_1D> 156 inline __device__ void ContractTransposeX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 157 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 158 __syncthreads(); 159 *V = 0.0; 160 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 161 for (CeedInt i = 0; i < Q_1D; i++) { 162 *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 163 } 164 } 165 __syncthreads(); 166 } 167 168 //------------------------------------------------------------------------------ 169 // 2D transpose tensor contract and add x 170 //------------------------------------------------------------------------------ 171 template <int NUM_COMP, int P_1D, int Q_1D> 172 inline __device__ void ContractTransposeAddX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 173 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 174 __syncthreads(); 175 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 176 for (CeedInt i = 0; i < Q_1D; i++) { 177 *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 178 } 179 } 180 __syncthreads(); 181 } 182 183 //------------------------------------------------------------------------------ 184 // 2D interpolate to quadrature points 185 //------------------------------------------------------------------------------ 186 template <int NUM_COMP, int P_1D, int Q_1D> 187 inline __device__ void InterpTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 188 CeedScalar r_t[1]; 189 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 190 ContractX2d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, r_t); 191 ContractY2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, &r_V[comp]); 192 } 193 } 194 195 //------------------------------------------------------------------------------ 196 // 2D interpolate transpose 197 //------------------------------------------------------------------------------ 198 template <int NUM_COMP, int P_1D, int Q_1D> 199 inline __device__ void InterpTransposeTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 200 CeedScalar *__restrict__ r_V) { 201 CeedScalar r_t[1]; 202 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 203 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, r_t); 204 ContractTransposeX2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, &r_V[comp]); 205 } 206 } 207 208 //------------------------------------------------------------------------------ 209 // 2D derivatives at quadrature points 210 //------------------------------------------------------------------------------ 211 template <int NUM_COMP, int P_1D, int Q_1D> 212 inline __device__ void GradTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 213 CeedScalar *__restrict__ r_V) { 214 CeedScalar r_t[1]; 215 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 216 ContractX2d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, r_t); 217 ContractY2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, &r_V[comp + 0 * NUM_COMP]); 218 ContractX2d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, r_t); 219 ContractY2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_G, &r_V[comp + 1 * NUM_COMP]); 220 } 221 } 222 223 //------------------------------------------------------------------------------ 224 // 2D derivatives transpose 225 //------------------------------------------------------------------------------ 226 template <int NUM_COMP, int P_1D, int Q_1D> 227 inline __device__ void GradTransposeTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 228 CeedScalar *__restrict__ r_V) { 229 CeedScalar r_t[1]; 230 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 231 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp + 0 * NUM_COMP], c_B, r_t); 232 ContractTransposeX2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_G, &r_V[comp]); 233 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, r_t); 234 ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, &r_V[comp]); 235 } 236 } 237 238 //------------------------------------------------------------------------------ 239 // 2D quadrature weights 240 //------------------------------------------------------------------------------ 241 template <int Q_1D> 242 inline __device__ void WeightTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 243 *w = (data.t_id_x < Q_1D && data.t_id_y < Q_1D) ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; 244 } 245 246 //------------------------------------------------------------------------------ 247 // 3D 248 //------------------------------------------------------------------------------ 249 250 //------------------------------------------------------------------------------ 251 // 3D tensor contract x 252 //------------------------------------------------------------------------------ 253 template <int NUM_COMP, int P_1D, int Q_1D> 254 inline __device__ void ContractX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 255 CeedScalar r_B[P_1D]; 256 for (CeedInt i = 0; i < P_1D; i++) { 257 r_B[i] = B[i + data.t_id_x * P_1D]; 258 } 259 260 for (CeedInt k = 0; k < P_1D; k++) { 261 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 262 __syncthreads(); 263 V[k] = 0.0; 264 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 265 for (CeedInt i = 0; i < P_1D; i++) { 266 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 267 } 268 } 269 __syncthreads(); 270 } 271 } 272 273 //------------------------------------------------------------------------------ 274 // 3D tensor contract y 275 //------------------------------------------------------------------------------ 276 template <int NUM_COMP, int P_1D, int Q_1D> 277 inline __device__ void ContractY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 278 CeedScalar r_B[P_1D]; 279 for (CeedInt i = 0; i < P_1D; i++) { 280 r_B[i] = B[i + data.t_id_y * P_1D]; 281 } 282 283 for (CeedInt k = 0; k < P_1D; k++) { 284 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 285 __syncthreads(); 286 V[k] = 0.0; 287 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 288 for (CeedInt i = 0; i < P_1D; i++) { 289 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 290 } 291 } 292 __syncthreads(); 293 } 294 } 295 296 //------------------------------------------------------------------------------ 297 // 3D tensor contract z 298 //------------------------------------------------------------------------------ 299 template <int NUM_COMP, int P_1D, int Q_1D> 300 inline __device__ void ContractZ3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 301 for (CeedInt k = 0; k < Q_1D; k++) { 302 V[k] = 0.0; 303 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 304 for (CeedInt i = 0; i < P_1D; i++) { 305 V[k] += B[i + k * P_1D] * U[i]; // Contract z direction 306 } 307 } 308 } 309 } 310 311 //------------------------------------------------------------------------------ 312 // 3D transpose tensor contract z 313 //------------------------------------------------------------------------------ 314 template <int NUM_COMP, int P_1D, int Q_1D> 315 inline __device__ void ContractTransposeZ3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 316 for (CeedInt k = 0; k < P_1D; k++) { 317 V[k] = 0.0; 318 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 319 for (CeedInt i = 0; i < Q_1D; i++) { 320 V[k] += B[k + i * P_1D] * U[i]; // Contract z direction 321 } 322 } 323 } 324 } 325 326 //------------------------------------------------------------------------------ 327 // 3D transpose tensor contract y 328 //------------------------------------------------------------------------------ 329 template <int NUM_COMP, int P_1D, int Q_1D> 330 inline __device__ void ContractTransposeY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 331 CeedScalar r_B[Q_1D]; 332 for (CeedInt i = 0; i < Q_1D; i++) { 333 r_B[i] = B[data.t_id_y + i * P_1D]; 334 } 335 336 for (CeedInt k = 0; k < P_1D; k++) { 337 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 338 __syncthreads(); 339 V[k] = 0.0; 340 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 341 for (CeedInt i = 0; i < Q_1D; i++) { 342 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 343 } 344 } 345 __syncthreads(); 346 } 347 } 348 349 //------------------------------------------------------------------------------ 350 // 3D transpose tensor contract y 351 //------------------------------------------------------------------------------ 352 template <int NUM_COMP, int P_1D, int Q_1D> 353 inline __device__ void ContractTransposeAddY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 354 CeedScalar r_B[Q_1D]; 355 for (CeedInt i = 0; i < Q_1D; i++) { 356 r_B[i] = B[data.t_id_y + i * P_1D]; 357 } 358 359 for (CeedInt k = 0; k < P_1D; k++) { 360 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 361 __syncthreads(); 362 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 363 for (CeedInt i = 0; i < Q_1D; i++) { 364 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 365 } 366 } 367 __syncthreads(); 368 } 369 } 370 371 //------------------------------------------------------------------------------ 372 // 3D transpose tensor contract x 373 //------------------------------------------------------------------------------ 374 template <int NUM_COMP, int P_1D, int Q_1D> 375 inline __device__ void ContractTransposeX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 376 CeedScalar r_B[Q_1D]; 377 for (CeedInt i = 0; i < Q_1D; i++) { 378 r_B[i] = B[data.t_id_x + i * P_1D]; 379 } 380 381 for (CeedInt k = 0; k < P_1D; k++) { 382 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 383 __syncthreads(); 384 V[k] = 0.0; 385 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 386 for (CeedInt i = 0; i < Q_1D; i++) { 387 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 388 } 389 } 390 __syncthreads(); 391 } 392 } 393 394 //------------------------------------------------------------------------------ 395 // 3D transpose tensor contract add x 396 //------------------------------------------------------------------------------ 397 template <int NUM_COMP, int P_1D, int Q_1D> 398 inline __device__ void ContractTransposeAddX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 399 CeedScalar r_B[Q_1D]; 400 for (CeedInt i = 0; i < Q_1D; i++) { 401 r_B[i] = B[data.t_id_x + i * P_1D]; 402 } 403 404 for (CeedInt k = 0; k < P_1D; k++) { 405 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 406 __syncthreads(); 407 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 408 for (CeedInt i = 0; i < Q_1D; i++) { 409 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 410 } 411 } 412 __syncthreads(); 413 } 414 } 415 416 //------------------------------------------------------------------------------ 417 // 3D interpolate to quadrature points 418 //------------------------------------------------------------------------------ 419 template <int NUM_COMP, int P_1D, int Q_1D> 420 inline __device__ void InterpTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 421 CeedScalar r_t1[T_1D]; 422 CeedScalar r_t2[T_1D]; 423 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 424 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 425 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 426 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * Q_1D]); 427 } 428 } 429 430 //------------------------------------------------------------------------------ 431 // 3D interpolate transpose 432 //------------------------------------------------------------------------------ 433 template <int NUM_COMP, int P_1D, int Q_1D> 434 inline __device__ void InterpTransposeTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 435 CeedScalar *__restrict__ r_V) { 436 CeedScalar r_t1[T_1D]; 437 CeedScalar r_t2[T_1D]; 438 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 439 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * Q_1D], c_B, r_t1); 440 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 441 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 442 } 443 } 444 445 //------------------------------------------------------------------------------ 446 // 3D derivatives at quadrature points 447 //------------------------------------------------------------------------------ 448 template <int NUM_COMP, int P_1D, int Q_1D> 449 inline __device__ void GradTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 450 CeedScalar *__restrict__ r_V) { 451 CeedScalar r_t1[T_1D]; 452 CeedScalar r_t2[T_1D]; 453 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 454 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * P_1D], c_G, r_t1); 455 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 456 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); 457 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 458 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_G, r_t2); 459 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); 460 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 461 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 462 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); 463 } 464 } 465 466 //------------------------------------------------------------------------------ 467 // 3D derivatives transpose 468 //------------------------------------------------------------------------------ 469 template <int NUM_COMP, int P_1D, int Q_1D> 470 inline __device__ void GradTransposeTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 471 CeedScalar *__restrict__ r_V) { 472 CeedScalar r_t1[T_1D]; 473 CeedScalar r_t2[T_1D]; 474 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 475 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_B, r_t1); 476 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 477 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_G, &r_V[comp * P_1D]); 478 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_B, r_t1); 479 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_G, r_t2); 480 ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 481 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, r_t1); 482 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 483 ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 484 } 485 } 486 487 //------------------------------------------------------------------------------ 488 // 3D derivatives at quadrature points 489 //------------------------------------------------------------------------------ 490 template <int NUM_COMP, int P_1D, int Q_1D> 491 inline __device__ void GradTensorCollocated3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 492 CeedScalar *__restrict__ r_V) { 493 CeedScalar r_t1[T_1D]; 494 CeedScalar r_t2[T_1D]; 495 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 496 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 497 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 498 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_t1); 499 ContractX3d<NUM_COMP, Q_1D, Q_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); 500 ContractY3d<NUM_COMP, Q_1D, Q_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); 501 ContractZ3d<NUM_COMP, Q_1D, Q_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); 502 } 503 } 504 505 //------------------------------------------------------------------------------ 506 // 3D derivatives transpose 507 //------------------------------------------------------------------------------ 508 template <int NUM_COMP, int P_1D, int Q_1D> 509 inline __device__ void GradTransposeTensorCollocated3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 510 const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 511 CeedScalar r_t1[T_1D]; 512 CeedScalar r_t2[T_1D]; 513 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 514 ContractTransposeZ3d<NUM_COMP, Q_1D, Q_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, r_t2); 515 ContractTransposeAddY3d<NUM_COMP, Q_1D, Q_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_G, r_t2); 516 ContractTransposeAddX3d<NUM_COMP, Q_1D, Q_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_G, r_t2); 517 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_t1); 518 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 519 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 520 } 521 } 522 523 //------------------------------------------------------------------------------ 524 // 3D quadrature weights 525 //------------------------------------------------------------------------------ 526 template <int Q_1D> 527 inline __device__ void WeightTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 528 const bool quad = (data.t_id_x < Q_1D && data.t_id_y < Q_1D); 529 const CeedScalar pw = quad ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; 530 for (CeedInt q = 0; q < Q_1D; q++) { 531 w[q] = quad ? pw * q_weight_1d[q] : 0.0; 532 } 533 } 534