ceed-hip-shared-basis.c - OpenGrok cross reference for /libCEED/backends/hip-shared/ceed-hip-shared-basis.c

Lines Matching refs:thread_1d
44   const CeedInt thread_1d = CeedIntMax(P_1d, Q_1d);  in ComputeBasisThreadBlockSizes()  local
59       CeedInt required = thread_1d * thread_1d;  in ComputeBasisThreadBlockSizes()
73       CeedInt required = thread_1d * thread_1d;  in ComputeBasisThreadBlockSizes()
124       CeedInt thread_1d     = CeedIntMax(Q_1d, P_1d);  in CeedBasisApplyTensorCore_Hip_shared()  local
128         CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64;  in CeedBasisApplyTensorCore_Hip_shared()
131         CeedInt shared_mem      = elems_per_block * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyTensorCore_Hip_shared()
134 …d_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, 1,  in CeedBasisApplyTensorCore_Hip_shared()
137 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per…  in CeedBasisApplyTensorCore_Hip_shared()
141         const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1);  in CeedBasisApplyTensorCore_Hip_shared()
143 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyTensorCore_Hip_shared()
146 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d,  in CeedBasisApplyTensorCore_Hip_shared()
147 …                                             thread_1d, elems_per_block, shared_mem, interp_args));  in CeedBasisApplyTensorCore_Hip_shared()
149 …ackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyTensorCore_Hip_shared()
152         const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1);  in CeedBasisApplyTensorCore_Hip_shared()
154 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyTensorCore_Hip_shared()
157 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d,  in CeedBasisApplyTensorCore_Hip_shared()
158 …                                             thread_1d, elems_per_block, shared_mem, interp_args));  in CeedBasisApplyTensorCore_Hip_shared()
160 …ackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyTensorCore_Hip_shared()
171       CeedInt     thread_1d = CeedIntMax(Q_1d, P_1d);  in CeedBasisApplyTensorCore_Hip_shared()  local
180         CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64;  in CeedBasisApplyTensorCore_Hip_shared()
183         CeedInt shared_mem      = elems_per_block * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyTensorCore_Hip_shared()
186 …hared_Hip(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread_1d, 1,  in CeedBasisApplyTensorCore_Hip_shared()
189 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread_1d, 1, elems_per_b…  in CeedBasisApplyTensorCore_Hip_shared()
193         const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1);  in CeedBasisApplyTensorCore_Hip_shared()
195 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyTensorCore_Hip_shared()
198 …p(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread_1d, thread_1d,  in CeedBasisApplyTensorCore_Hip_shared()
201 …lBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyTensorCore_Hip_shared()
204         const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1);  in CeedBasisApplyTensorCore_Hip_shared()
206 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyTensorCore_Hip_shared()
209 …p(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread_1d, thread_1d,  in CeedBasisApplyTensorCore_Hip_shared()
212 …lBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyTensorCore_Hip_shared()
387       CeedInt thread_1d     = CeedIntMax(Q_1d, P_1d);  in CeedBasisApplyAtPointsCore_Hip_shared()  local
391         CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64;  in CeedBasisApplyAtPointsCore_Hip_shared()
394         CeedInt shared_mem      = elems_per_block * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyAtPointsCore_Hip_shared()
398 …                                          thread_1d, 1, elems_per_block, shared_mem, interp_args));  in CeedBasisApplyAtPointsCore_Hip_shared()
400 …kend(CeedRunKernelDimShared_Hip(ceed, data->InterpAtPoints, NULL, grid, thread_1d, 1, elems_per_bl…  in CeedBasisApplyAtPointsCore_Hip_shared()
404         const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1);  in CeedBasisApplyAtPointsCore_Hip_shared()
406 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyAtPointsCore_Hip_shared()
410 …                                  thread_1d, thread_1d, elems_per_block, shared_mem, interp_args));  in CeedBasisApplyAtPointsCore_Hip_shared()
412 …eedRunKernelDimShared_Hip(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyAtPointsCore_Hip_shared()
418 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyAtPointsCore_Hip_shared()
422 …                                  thread_1d, thread_1d, elems_per_block, shared_mem, interp_args));  in CeedBasisApplyAtPointsCore_Hip_shared()
424 …eedRunKernelDimShared_Hip(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyAtPointsCore_Hip_shared()
435       CeedInt thread_1d   = CeedIntMax(Q_1d, P_1d);  in CeedBasisApplyAtPointsCore_Hip_shared()  local
439         CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64;  in CeedBasisApplyAtPointsCore_Hip_shared()
442         CeedInt shared_mem      = elems_per_block * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyAtPointsCore_Hip_shared()
446 …                                            thread_1d, 1, elems_per_block, shared_mem, grad_args));  in CeedBasisApplyAtPointsCore_Hip_shared()
448 …ackend(CeedRunKernelDimShared_Hip(ceed, data->GradAtPoints, NULL, grid, thread_1d, 1, elems_per_bl…  in CeedBasisApplyAtPointsCore_Hip_shared()
452         const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1);  in CeedBasisApplyAtPointsCore_Hip_shared()
454 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyAtPointsCore_Hip_shared()
458 …                                    thread_1d, thread_1d, elems_per_block, shared_mem, grad_args));  in CeedBasisApplyAtPointsCore_Hip_shared()
460 …(CeedRunKernelDimShared_Hip(ceed, data->GradAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyAtPointsCore_Hip_shared()
466 …      CeedInt       shared_mem      = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar);  in CeedBasisApplyAtPointsCore_Hip_shared()
470 …                                    thread_1d, thread_1d, elems_per_block, shared_mem, grad_args));  in CeedBasisApplyAtPointsCore_Hip_shared()
472 …(CeedRunKernelDimShared_Hip(ceed, data->GradAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_…  in CeedBasisApplyAtPointsCore_Hip_shared()