1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP shared memory tensor product basis templates 10 #ifndef CEED_HIP_SHARED_BASIS_TENSOR_TEMPLATES_H 11 #define CEED_HIP_SHARED_BASIS_TENSOR_TEMPLATES_H 12 13 #include <ceed.h> 14 15 //------------------------------------------------------------------------------ 16 // 1D 17 //------------------------------------------------------------------------------ 18 19 //------------------------------------------------------------------------------ 20 // 1D tensor contraction x 21 //------------------------------------------------------------------------------ 22 template <int NUM_COMP, int P_1D, int Q_1D> 23 inline __device__ void ContractX1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 24 data.slice[data.t_id_x] = *U; 25 __syncthreads(); 26 *V = 0.0; 27 if (data.t_id_x < Q_1D) { 28 for (CeedInt i = 0; i < P_1D; i++) { 29 *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 30 } 31 } 32 __syncthreads(); 33 } 34 35 //------------------------------------------------------------------------------ 36 // 1D transpose tensor contraction x 37 //------------------------------------------------------------------------------ 38 template <int NUM_COMP, int P_1D, int Q_1D> 39 inline __device__ void ContractTransposeX1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 40 data.slice[data.t_id_x] = *U; 41 __syncthreads(); 42 *V = 0.0; 43 if (data.t_id_x < P_1D) { 44 for (CeedInt i = 0; i < Q_1D; i++) { 45 *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 46 } 47 } 48 __syncthreads(); 49 } 50 51 //------------------------------------------------------------------------------ 52 // 1D interpolate to quadrature points 53 //------------------------------------------------------------------------------ 54 template <int NUM_COMP, int P_1D, int Q_1D> 55 inline __device__ void Interp1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 56 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 57 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_B, r_V + comp); 58 } 59 } 60 61 //------------------------------------------------------------------------------ 62 // 1D interpolate transpose 63 //------------------------------------------------------------------------------ 64 template <int NUM_COMP, int P_1D, int Q_1D> 65 inline __device__ void InterpTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 66 CeedScalar *__restrict__ r_V) { 67 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 68 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_B, r_V + comp); 69 } 70 } 71 72 //------------------------------------------------------------------------------ 73 // 1D derivatives at quadrature points 74 //------------------------------------------------------------------------------ 75 template <int NUM_COMP, int P_1D, int Q_1D> 76 inline __device__ void Grad1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 77 CeedScalar *__restrict__ r_V) { 78 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 79 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_G, r_V + comp); 80 } 81 } 82 83 //------------------------------------------------------------------------------ 84 // 1D derivatives transpose 85 //------------------------------------------------------------------------------ 86 template <int NUM_COMP, int P_1D, int Q_1D> 87 inline __device__ void GradTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 88 CeedScalar *__restrict__ r_V) { 89 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 90 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_G, r_V + comp); 91 } 92 } 93 94 //------------------------------------------------------------------------------ 95 // 1D quadrature weights 96 //------------------------------------------------------------------------------ 97 template <int Q_1D> 98 inline __device__ void Weight1d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 99 *w = (data.t_id_x < Q_1D) ? q_weight_1d[data.t_id_x] : 0.0; 100 } 101 102 //------------------------------------------------------------------------------ 103 // 2D 104 //------------------------------------------------------------------------------ 105 106 //------------------------------------------------------------------------------ 107 // 2D tensor contraction x 108 //------------------------------------------------------------------------------ 109 template <int NUM_COMP, int P_1D, int Q_1D> 110 inline __device__ void ContractX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 111 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 112 __syncthreads(); 113 *V = 0.0; 114 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 115 for (CeedInt i = 0; i < P_1D; i++) { 116 *V += B[i + data.t_id_x * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 117 } 118 } 119 __syncthreads(); 120 } 121 122 //------------------------------------------------------------------------------ 123 // 2D tensor contract y 124 //------------------------------------------------------------------------------ 125 template <int NUM_COMP, int P_1D, int Q_1D> 126 inline __device__ void ContractY2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 127 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 128 __syncthreads(); 129 *V = 0.0; 130 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 131 for (CeedInt i = 0; i < P_1D; i++) { 132 *V += B[i + data.t_id_y * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 133 } 134 } 135 __syncthreads(); 136 } 137 138 //------------------------------------------------------------------------------ 139 // 2D transpose tensor contract y 140 //------------------------------------------------------------------------------ 141 template <int NUM_COMP, int P_1D, int Q_1D> 142 inline __device__ void ContractTransposeY2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 143 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 144 __syncthreads(); 145 *V = 0.0; 146 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 147 for (CeedInt i = 0; i < Q_1D; i++) { 148 *V += B[data.t_id_y + i * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 149 } 150 } 151 __syncthreads(); 152 } 153 154 //------------------------------------------------------------------------------ 155 // 2D transpose tensor contract x 156 //------------------------------------------------------------------------------ 157 template <int NUM_COMP, int P_1D, int Q_1D> 158 inline __device__ void ContractTransposeX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 159 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 160 __syncthreads(); 161 *V = 0.0; 162 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 163 for (CeedInt i = 0; i < Q_1D; i++) { 164 *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 165 } 166 } 167 __syncthreads(); 168 } 169 170 //------------------------------------------------------------------------------ 171 // 2D transpose tensor contract and add x 172 //------------------------------------------------------------------------------ 173 template <int NUM_COMP, int P_1D, int Q_1D> 174 inline __device__ void ContractTransposeAddX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 175 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 176 __syncthreads(); 177 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 178 for (CeedInt i = 0; i < Q_1D; i++) { 179 *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 180 } 181 } 182 __syncthreads(); 183 } 184 185 //------------------------------------------------------------------------------ 186 // 2D interpolate to quadrature points 187 //------------------------------------------------------------------------------ 188 template <int NUM_COMP, int P_1D, int Q_1D> 189 inline __device__ void InterpTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 190 CeedScalar r_t[1]; 191 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 192 ContractX2d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_B, r_t); 193 ContractY2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, r_V + comp); 194 } 195 } 196 197 //------------------------------------------------------------------------------ 198 // 2D interpolate transpose 199 //------------------------------------------------------------------------------ 200 template <int NUM_COMP, int P_1D, int Q_1D> 201 inline __device__ void InterpTransposeTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 202 CeedScalar *__restrict__ r_V) { 203 CeedScalar r_t[1]; 204 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 205 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_B, r_t); 206 ContractTransposeX2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, r_V + comp); 207 } 208 } 209 210 //------------------------------------------------------------------------------ 211 // 2D derivatives at quadrature points 212 //------------------------------------------------------------------------------ 213 template <int NUM_COMP, int P_1D, int Q_1D> 214 inline __device__ void GradTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 215 CeedScalar *__restrict__ r_V) { 216 CeedScalar r_t[1]; 217 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 218 ContractX2d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_G, r_t); 219 ContractY2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, r_V + comp + 0 * NUM_COMP); 220 ContractX2d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp, c_B, r_t); 221 ContractY2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_G, r_V + comp + 1 * NUM_COMP); 222 } 223 } 224 225 //------------------------------------------------------------------------------ 226 // 2D derivatives transpose 227 //------------------------------------------------------------------------------ 228 template <int NUM_COMP, int P_1D, int Q_1D> 229 inline __device__ void GradTransposeTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 230 CeedScalar *__restrict__ r_V) { 231 CeedScalar r_t[1]; 232 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 233 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp + 0 * NUM_COMP, c_B, r_t); 234 ContractTransposeX2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_G, r_V + comp); 235 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp + 1 * NUM_COMP, c_G, r_t); 236 ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D>(data, r_t, c_B, r_V + comp); 237 } 238 } 239 240 //------------------------------------------------------------------------------ 241 // 2D quadrature weights 242 //------------------------------------------------------------------------------ 243 template <int Q_1D> 244 inline __device__ void WeightTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 245 *w = (data.t_id_x < Q_1D && data.t_id_y < Q_1D) ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; 246 } 247 248 //------------------------------------------------------------------------------ 249 // 3D 250 //------------------------------------------------------------------------------ 251 252 //------------------------------------------------------------------------------ 253 // 3D tensor contract x 254 //------------------------------------------------------------------------------ 255 template <int NUM_COMP, int P_1D, int Q_1D> 256 inline __device__ void ContractX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 257 CeedScalar r_B[P_1D]; 258 for (CeedInt i = 0; i < P_1D; i++) { 259 r_B[i] = B[i + data.t_id_x * P_1D]; 260 } 261 262 for (CeedInt k = 0; k < P_1D; k++) { 263 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 264 __syncthreads(); 265 V[k] = 0.0; 266 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 267 for (CeedInt i = 0; i < P_1D; i++) { 268 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 269 } 270 } 271 __syncthreads(); 272 } 273 } 274 275 //------------------------------------------------------------------------------ 276 // 3D tensor contract y 277 //------------------------------------------------------------------------------ 278 template <int NUM_COMP, int P_1D, int Q_1D> 279 inline __device__ void ContractY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 280 CeedScalar r_B[P_1D]; 281 for (CeedInt i = 0; i < P_1D; i++) { 282 r_B[i] = B[i + data.t_id_y * P_1D]; 283 } 284 285 for (CeedInt k = 0; k < P_1D; k++) { 286 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 287 __syncthreads(); 288 V[k] = 0.0; 289 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 290 for (CeedInt i = 0; i < P_1D; i++) { 291 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 292 } 293 } 294 __syncthreads(); 295 } 296 } 297 298 //------------------------------------------------------------------------------ 299 // 3D tensor contract z 300 //------------------------------------------------------------------------------ 301 template <int NUM_COMP, int P_1D, int Q_1D> 302 inline __device__ void ContractZ3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 303 for (CeedInt k = 0; k < Q_1D; k++) { 304 V[k] = 0.0; 305 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 306 for (CeedInt i = 0; i < P_1D; i++) { 307 V[k] += B[i + k * P_1D] * U[i]; // Contract z direction 308 } 309 } 310 } 311 } 312 313 //------------------------------------------------------------------------------ 314 // 3D transpose tensor contract z 315 //------------------------------------------------------------------------------ 316 template <int NUM_COMP, int P_1D, int Q_1D> 317 inline __device__ void ContractTransposeZ3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 318 for (CeedInt k = 0; k < P_1D; k++) { 319 V[k] = 0.0; 320 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 321 for (CeedInt i = 0; i < Q_1D; i++) { 322 V[k] += B[k + i * P_1D] * U[i]; // Contract z direction 323 } 324 } 325 } 326 } 327 328 //------------------------------------------------------------------------------ 329 // 3D transpose tensor contract y 330 //------------------------------------------------------------------------------ 331 template <int NUM_COMP, int P_1D, int Q_1D> 332 inline __device__ void ContractTransposeY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 333 CeedScalar r_B[Q_1D]; 334 for (CeedInt i = 0; i < Q_1D; i++) { 335 r_B[i] = B[data.t_id_y + i * P_1D]; 336 } 337 338 for (CeedInt k = 0; k < P_1D; k++) { 339 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 340 __syncthreads(); 341 V[k] = 0.0; 342 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 343 for (CeedInt i = 0; i < Q_1D; i++) { 344 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 345 } 346 } 347 __syncthreads(); 348 } 349 } 350 351 //------------------------------------------------------------------------------ 352 // 3D transpose tensor contract y 353 //------------------------------------------------------------------------------ 354 template <int NUM_COMP, int P_1D, int Q_1D> 355 inline __device__ void ContractTransposeAddY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 356 CeedScalar r_B[Q_1D]; 357 for (CeedInt i = 0; i < Q_1D; i++) { 358 r_B[i] = B[data.t_id_y + i * P_1D]; 359 } 360 361 for (CeedInt k = 0; k < P_1D; k++) { 362 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 363 __syncthreads(); 364 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 365 for (CeedInt i = 0; i < Q_1D; i++) { 366 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 367 } 368 } 369 __syncthreads(); 370 } 371 } 372 373 //------------------------------------------------------------------------------ 374 // 3D transpose tensor contract x 375 //------------------------------------------------------------------------------ 376 template <int NUM_COMP, int P_1D, int Q_1D> 377 inline __device__ void ContractTransposeX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 378 CeedScalar r_B[Q_1D]; 379 for (CeedInt i = 0; i < Q_1D; i++) { 380 r_B[i] = B[data.t_id_x + i * P_1D]; 381 } 382 383 for (CeedInt k = 0; k < P_1D; k++) { 384 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 385 __syncthreads(); 386 V[k] = 0.0; 387 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 388 for (CeedInt i = 0; i < Q_1D; i++) { 389 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 390 } 391 } 392 __syncthreads(); 393 } 394 } 395 396 //------------------------------------------------------------------------------ 397 // 3D transpose tensor contract add x 398 //------------------------------------------------------------------------------ 399 template <int NUM_COMP, int P_1D, int Q_1D> 400 inline __device__ void ContractTransposeAddX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 401 CeedScalar r_B[Q_1D]; 402 for (CeedInt i = 0; i < Q_1D; i++) { 403 r_B[i] = B[data.t_id_x + i * P_1D]; 404 } 405 406 for (CeedInt k = 0; k < P_1D; k++) { 407 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 408 __syncthreads(); 409 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 410 for (CeedInt i = 0; i < Q_1D; i++) { 411 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 412 } 413 } 414 __syncthreads(); 415 } 416 } 417 418 //------------------------------------------------------------------------------ 419 // 3D interpolate to quadrature points 420 //------------------------------------------------------------------------------ 421 template <int NUM_COMP, int P_1D, int Q_1D> 422 inline __device__ void InterpTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 423 CeedScalar r_t1[T_1D]; 424 CeedScalar r_t2[T_1D]; 425 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 426 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * P_1D, c_B, r_t1); 427 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 428 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * Q_1D); 429 } 430 } 431 432 //------------------------------------------------------------------------------ 433 // 3D interpolate transpose 434 //------------------------------------------------------------------------------ 435 template <int NUM_COMP, int P_1D, int Q_1D> 436 inline __device__ void InterpTransposeTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 437 CeedScalar *__restrict__ r_V) { 438 CeedScalar r_t1[T_1D]; 439 CeedScalar r_t2[T_1D]; 440 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 441 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * Q_1D, c_B, r_t1); 442 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 443 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * P_1D); 444 } 445 } 446 447 //------------------------------------------------------------------------------ 448 // 3D derivatives at quadrature points 449 //------------------------------------------------------------------------------ 450 template <int NUM_COMP, int P_1D, int Q_1D> 451 inline __device__ void GradTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 452 CeedScalar *__restrict__ r_V) { 453 CeedScalar r_t1[T_1D]; 454 CeedScalar r_t2[T_1D]; 455 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 456 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * P_1D, c_G, r_t1); 457 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 458 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * Q_1D + 0 * NUM_COMP * Q_1D); 459 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * P_1D, c_B, r_t1); 460 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_G, r_t2); 461 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * Q_1D + 1 * NUM_COMP * Q_1D); 462 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * P_1D, c_B, r_t1); 463 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 464 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_G, r_V + comp * Q_1D + 2 * NUM_COMP * Q_1D); 465 } 466 } 467 468 //------------------------------------------------------------------------------ 469 // 3D derivatives transpose 470 //------------------------------------------------------------------------------ 471 template <int NUM_COMP, int P_1D, int Q_1D> 472 inline __device__ void GradTransposeTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 473 CeedScalar *__restrict__ r_V) { 474 CeedScalar r_t1[T_1D]; 475 CeedScalar r_t2[T_1D]; 476 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 477 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * Q_1D + 0 * NUM_COMP * Q_1D, c_B, r_t1); 478 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 479 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_G, r_V + comp * P_1D); 480 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * Q_1D + 1 * NUM_COMP * Q_1D, c_B, r_t1); 481 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_G, r_t2); 482 ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * P_1D); 483 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * Q_1D + 2 * NUM_COMP * Q_1D, c_G, r_t1); 484 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 485 ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * P_1D); 486 } 487 } 488 489 //------------------------------------------------------------------------------ 490 // 3D derivatives at quadrature points 491 //------------------------------------------------------------------------------ 492 template <int NUM_COMP, int P_1D, int Q_1D> 493 inline __device__ void GradTensorCollocated3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 494 CeedScalar *__restrict__ r_V) { 495 CeedScalar r_t1[T_1D]; 496 CeedScalar r_t2[T_1D]; 497 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 498 ContractX3d<NUM_COMP, P_1D, Q_1D>(data, r_U + comp * P_1D, c_B, r_t1); 499 ContractY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 500 ContractZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_t1); 501 ContractX3d<NUM_COMP, Q_1D, Q_1D>(data, r_t1, c_G, r_V + comp * Q_1D + 0 * NUM_COMP * Q_1D); 502 ContractY3d<NUM_COMP, Q_1D, Q_1D>(data, r_t1, c_G, r_V + comp * Q_1D + 1 * NUM_COMP * Q_1D); 503 ContractZ3d<NUM_COMP, Q_1D, Q_1D>(data, r_t1, c_G, r_V + comp * Q_1D + 2 * NUM_COMP * Q_1D); 504 } 505 } 506 507 //------------------------------------------------------------------------------ 508 // 3D derivatives transpose 509 //------------------------------------------------------------------------------ 510 template <int NUM_COMP, int P_1D, int Q_1D> 511 inline __device__ void GradTransposeTensorCollocated3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 512 const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 513 CeedScalar r_t1[T_1D]; 514 CeedScalar r_t2[T_1D]; 515 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 516 ContractTransposeZ3d<NUM_COMP, Q_1D, Q_1D>(data, r_U + comp * Q_1D + 2 * NUM_COMP * Q_1D, c_G, r_t2); 517 ContractTransposeAddY3d<NUM_COMP, Q_1D, Q_1D>(data, r_U + comp * Q_1D + 1 * NUM_COMP * Q_1D, c_G, r_t2); 518 ContractTransposeAddX3d<NUM_COMP, Q_1D, Q_1D>(data, r_U + comp * Q_1D + 0 * NUM_COMP * Q_1D, c_G, r_t2); 519 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_t1); 520 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D>(data, r_t1, c_B, r_t2); 521 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D>(data, r_t2, c_B, r_V + comp * P_1D); 522 } 523 } 524 525 //------------------------------------------------------------------------------ 526 // 3D quadrature weights 527 //------------------------------------------------------------------------------ 528 template <int Q_1D> 529 inline __device__ void WeightTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 530 const bool quad = (data.t_id_x < Q_1D && data.t_id_y < Q_1D); 531 const CeedScalar pw = quad ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; 532 for (CeedInt q = 0; q < Q_1D; q++) { 533 w[q] = quad ? pw * q_weight_1d[q] : 0.0; 534 } 535 } 536 537 //------------------------------------------------------------------------------ 538 539 #endif // CEED_HIP_SHARED_BASIS_TENSOR_TEMPLATES_H 540