Home
last modified time | relevance | path

Searched refs:grid (Results 1 – 20 of 20) sorted by relevance

/libCEED/backends/cuda-shared/
H A Dceed-cuda-shared-basis.c63 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
67 …_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, 1, in CeedBasisApplyTensorCore_Cuda_shared()
70 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_pe… in CeedBasisApplyTensorCore_Cuda_shared()
76 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
80 …red_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, in CeedBasisApplyTensorCore_Cuda_shared()
83 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, … in CeedBasisApplyTensorCore_Cuda_shared()
88 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
92 …red_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, in CeedBasisApplyTensorCore_Cuda_shared()
95 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, … in CeedBasisApplyTensorCore_Cuda_shared()
117 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
[all …]
/libCEED/backends/hip-shared/
H A Dceed-hip-shared-basis.c130 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
134 …d_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, 1, in CeedBasisApplyTensorCore_Hip_shared()
137 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per… in CeedBasisApplyTensorCore_Hip_shared()
142 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
146 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, in CeedBasisApplyTensorCore_Hip_shared()
149 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, e… in CeedBasisApplyTensorCore_Hip_shared()
153 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
157 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, in CeedBasisApplyTensorCore_Hip_shared()
160 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, e… in CeedBasisApplyTensorCore_Hip_shared()
182 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
[all …]
/libCEED/backends/cuda-ref/
H A Dceed-cuda-ref-restriction.c150 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Cuda_Core() local
156 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
162 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
168 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
172 … CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
179 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
183 … CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
187 …CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)… in CeedElemRestrictionApply_Cuda_Core()
195 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Cuda_Core() local
201 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
[all …]
H A Dceed-cuda-ref-basis.c261 const int grid = CeedDivUpInt(num_elem, elems_per_block); in CeedBasisApplyNonTensorCore_Cuda() local
289 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_pe… in CeedBasisApplyNonTensorCore_Cuda()
291 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, … in CeedBasisApplyNonTensorCore_Cuda()
299 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda()
301 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, g… in CeedBasisApplyNonTensorCore_Cuda()
309 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda()
311 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, d… in CeedBasisApplyNonTensorCore_Cuda()
319 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda()
321 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, c… in CeedBasisApplyNonTensorCore_Cuda()
328 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weig… in CeedBasisApplyNonTensorCore_Cuda()
H A Dceed-cuda-ref-operator.c1466 CeedInt grid = CeedDivUpInt(num_elem, elems_per_block); in CeedOperatorAssembleDiagonalCore_Cuda() local
1472 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, diag->LinearPointBlock, grid, num_nodes, 1, elems_per_… in CeedOperatorAssembleDiagonalCore_Cuda()
1474 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, diag->LinearDiagonal, grid, num_nodes, 1, elems_per_bl… in CeedOperatorAssembleDiagonalCore_Cuda()
1784 CeedInt grid = CeedDivUpInt(num_elem_in, asmb->elems_per_block); in CeedOperatorAssembleSingle_Cuda() local
1788 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, asmb->LinearAssemble, NULL, grid, asmb->block_si… in CeedOperatorAssembleSingle_Cuda()
/libCEED/backends/hip-ref/
H A Dceed-hip-ref-restriction.c151 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Hip_Core() local
157 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
163 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
169 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
173 … CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
180 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
184 … CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
188 …CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
196 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Hip_Core() local
202 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
[all …]
H A Dceed-hip-ref-basis.c260 const int grid = CeedDivUpInt(num_elem, elems_per_block); in CeedBasisApplyNonTensorCore_Hip() local
288 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Hip()
290 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, i… in CeedBasisApplyNonTensorCore_Hip()
298 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip()
300 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, gr… in CeedBasisApplyNonTensorCore_Hip()
308 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip()
310 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, di… in CeedBasisApplyNonTensorCore_Hip()
318 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip()
320 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, cu… in CeedBasisApplyNonTensorCore_Hip()
327 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weigh… in CeedBasisApplyNonTensorCore_Hip()
H A Dceed-hip-ref-operator.c1463 CeedInt grid = CeedDivUpInt(num_elem, elems_per_block); in CeedOperatorAssembleDiagonalCore_Hip() local
1469 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, diag->LinearPointBlock, grid, num_nodes, 1, elems_per_b… in CeedOperatorAssembleDiagonalCore_Hip()
1471 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, diag->LinearDiagonal, grid, num_nodes, 1, elems_per_blo… in CeedOperatorAssembleDiagonalCore_Hip()
1781 CeedInt grid = CeedDivUpInt(num_elem_in, asmb->elems_per_block); in CeedOperatorAssembleSingle_Hip() local
1785 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, asmb->LinearAssemble, NULL, grid, asmb->block_siz… in CeedOperatorAssembleSingle_Hip()
/libCEED/backends/hip-gen/
H A Dceed-hip-gen-operator.c163 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorApplyAddCore_Hip_gen() local
166 …CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_… in CeedOperatorApplyAddCore_Hip_gen()
169 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorApplyAddCore_Hip_gen() local
172 …CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_… in CeedOperatorApplyAddCore_Hip_gen()
175 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorApplyAddCore_Hip_gen() local
178 …CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_… in CeedOperatorApplyAddCore_Hip_gen()
441 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen() local
444 …CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_si… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen()
447 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen() local
450 …CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_si… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen()
[all …]
/libCEED/backends/sycl-gen/
H A Dceed-sycl-gen-operator.sycl.cpp126 CeedInt block_sizes[3], grid = 0; in CeedOperatorApplyAdd_Sycl_gen() local
130grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : … in CeedOperatorApplyAdd_Sycl_gen()
133grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : … in CeedOperatorApplyAdd_Sycl_gen()
136grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : … in CeedOperatorApplyAdd_Sycl_gen()
141 sycl::range<3> global_range(grid * block_sizes[2], block_sizes[1], block_sizes[0]); in CeedOperatorApplyAdd_Sycl_gen()
/libCEED/backends/cuda-gen/
H A Dceed-cuda-gen-operator.c76 int *grid) { in BlockGridCalculate() argument
95 *grid = CeedDivUpInt(num_elem, elems_per_block); in BlockGridCalculate()
205 int max_threads_per_block, min_grid_size, grid; in CeedOperatorApplyAddCore_Cuda_gen() local
213 … cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid)); in CeedOperatorApplyAddCore_Cuda_gen()
217 grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedOperatorApplyAddCore_Cuda_gen()
222 …CeedCallBackend(CeedTryRunKernelDimShared_Cuda(ceed, data->op, stream, grid, block[0], block[1], b… in CeedOperatorApplyAddCore_Cuda_gen()
460 int max_threads_per_block, min_grid_size, grid; in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local
468 … cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid)); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
472 grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
477 …CeedCallBackend(CeedTryRunKernelDimShared_Cuda(ceed, data->assemble_qfunction, NULL, grid, block[0… in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
[all …]
/libCEED/backends/magma/
H A Dceed-magma-basis.c116 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma() local
120 …dMagma(ceed, apply_add ? impl->InterpTransposeAdd : impl->InterpTranspose, NULL, grid, num_threads, in CeedBasisApplyCore_Magma()
123 …CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Interp, NULL, grid, num_threads, num_t_col… in CeedBasisApplyCore_Magma()
193 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma() local
198 …haredMagma(ceed, apply_add ? impl->GradTransposeAdd : impl->GradTranspose, NULL, grid, num_threads, in CeedBasisApplyCore_Magma()
201 …CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Grad, NULL, grid, num_threads, num_t_col, … in CeedBasisApplyCore_Magma()
230 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma() local
233 …CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, NULL, grid, num_threads, num_t_col… in CeedBasisApplyCore_Magma()
426 CeedInt grid = CeedDivUpInt(N, num_t_col * NB); in CeedBasisApplyNonTensorCore_Magma() local
432 …CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, Kernel, NULL, grid, M, num_t_col, 1, shared_mem,… in CeedBasisApplyNonTensorCore_Magma()
[all …]
/libCEED/examples/petsc/
H A DREADME.md7 This code solves the CEED bakeoff problems on a structured grid generated and referenced using only…
19 This code solves the CEED bakeoff problems on a unstructured grid using DMPlex.
44 This code solves the CEED bakeoff problems on a unstructured grid using DMPlex with p-multigrid imp…
H A Dindex.md97 …ecomposition (see `bpsraw.c`) and using PETSc's `DMPlex` for unstructured grid management (see `bp…
153 … operations, $\bm{B}$, and $\bm{B}^T$, respectively, act on the different grid levels with corresp…
/libCEED/benchmarks/
H A Dpostprocess_plot.py166 grid('on', color='gray', ls='dotted')
167 grid('on', axis='both', which='minor', color='gray', ls='dotted')
/libCEED/examples/deal.II/
H A D.clang-format92 - Regex: "deal.II/grid/.*\\.h"
/libCEED/examples/fluids/
H A Dindex.md160 …is a sort of "mass matrix", and typically well-conditioned independent of grid resolution with a s…
337 The function $\langle \phi \rangle (x,y)$ is represented on a 2-D finite element grid, taken from t…
340 We'll refer to this mesh as the *parent grid*, as for every "parent" point in the parent grid, ther…
341 Define a function space on the parent grid as $\mathcal{V}_p^\mathrm{parent} = \{ \bm v(\bm x) \in …
372 To do this efficiently, **we assume and exploit the full domain grid to be a tensor product in the …
465 For inhomogeneous anisotropic filtering, we use the finite element grid itself to define $\bm{\Delt…
471 The filter width tensor $\bm{\Delta}$, be it defined from grid based sources or just the homogenous…
921 This coordinate modification is done to transform a given grid onto a domain of $x,y,z \in [0, 2\pi…
H A DREADME.md263 …while the simpler HLL converts thermal structures exiting the domain into grid-scale reflecting ac…
643 - Use filter width based on the grid size
/libCEED/doc/sphinx/source/
H A Dreleasenotes.md269 …to facilitate creation of multigrid prolongation, restriction, and coarse grid operators using a c…
362 `examples/navier-stokes`): unstructured grid support (using PETSc's `DMPlex`),
/libCEED/examples/nek/bps/
H A Dbps.usr1497 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts