Lines Matching refs:data
31 CeedBasis_Cuda_shared *data; in CeedBasisApplyTensorCore_Cuda_shared() local
35 CeedCallBackend(CeedBasisGetData(basis, &data)); in CeedBasisApplyTensorCore_Cuda_shared()
53 …CeedCheck(data->d_interp_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; interp_1d not set", Ceed… in CeedBasisApplyTensorCore_Cuda_shared()
58 void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared()
67 …eedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->Inte… in CeedBasisApplyTensorCore_Cuda_shared()
70 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_pe… in CeedBasisApplyTensorCore_Cuda_shared()
80 …eedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->Inte… in CeedBasisApplyTensorCore_Cuda_shared()
83 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, … in CeedBasisApplyTensorCore_Cuda_shared()
92 …eedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->Inte… in CeedBasisApplyTensorCore_Cuda_shared()
95 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, … in CeedBasisApplyTensorCore_Cuda_shared()
103 …CeedCheck(data->d_grad_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; grad_1d not set", CeedEval… in CeedBasisApplyTensorCore_Cuda_shared()
107 CeedScalar *d_grad_1d = data->d_grad_1d; in CeedBasisApplyTensorCore_Cuda_shared()
109 if (data->d_collo_grad_1d) { in CeedBasisApplyTensorCore_Cuda_shared()
110 d_grad_1d = data->d_collo_grad_1d; in CeedBasisApplyTensorCore_Cuda_shared()
112 void *grad_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_grad_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared()
121 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAdd : data->GradT… in CeedBasisApplyTensorCore_Cuda_shared()
124 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Grad, NULL, grid, thread_1d, 1, elems_per_… in CeedBasisApplyTensorCore_Cuda_shared()
134 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAdd : data->GradT… in CeedBasisApplyTensorCore_Cuda_shared()
137 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, el… in CeedBasisApplyTensorCore_Cuda_shared()
145 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAdd : data->GradT… in CeedBasisApplyTensorCore_Cuda_shared()
148 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, el… in CeedBasisApplyTensorCore_Cuda_shared()
156 …CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set",… in CeedBasisApplyTensorCore_Cuda_shared()
158 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared()
163 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, elems_per_block, 1, wei… in CeedBasisApplyTensorCore_Cuda_shared()
169 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, … in CeedBasisApplyTensorCore_Cuda_shared()
175 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, … in CeedBasisApplyTensorCore_Cuda_shared()
218 CeedBasis_Cuda_shared *data; in CeedBasisApplyAtPointsCore_Cuda_shared() local
220 CeedCallBackend(CeedBasisGetData(basis, &data)); in CeedBasisApplyAtPointsCore_Cuda_shared()
252 if (num_elem != data->num_elem_at_points) { in CeedBasisApplyAtPointsCore_Cuda_shared()
253 data->num_elem_at_points = num_elem; in CeedBasisApplyAtPointsCore_Cuda_shared()
255 if (data->d_points_per_elem) CeedCallCuda(ceed, cudaFree(data->d_points_per_elem)); in CeedBasisApplyAtPointsCore_Cuda_shared()
256 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_points_per_elem, num_bytes)); in CeedBasisApplyAtPointsCore_Cuda_shared()
257 CeedCallBackend(CeedFree(&data->h_points_per_elem)); in CeedBasisApplyAtPointsCore_Cuda_shared()
258 CeedCallBackend(CeedCalloc(num_elem, &data->h_points_per_elem)); in CeedBasisApplyAtPointsCore_Cuda_shared()
260 if (memcmp(data->h_points_per_elem, num_points, num_bytes)) { in CeedBasisApplyAtPointsCore_Cuda_shared()
261 memcpy(data->h_points_per_elem, num_points, num_bytes); in CeedBasisApplyAtPointsCore_Cuda_shared()
262 …CeedCallCuda(ceed, cudaMemcpy(data->d_points_per_elem, num_points, num_bytes, cudaMemcpyHostToDevi… in CeedBasisApplyAtPointsCore_Cuda_shared()
267 if (data->num_points != max_num_points) { in CeedBasisApplyAtPointsCore_Cuda_shared()
271 data->num_points = max_num_points; in CeedBasisApplyAtPointsCore_Cuda_shared()
274 if (!data->d_chebyshev_interp_1d) { in CeedBasisApplyAtPointsCore_Cuda_shared()
281 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); in CeedBasisApplyAtPointsCore_Cuda_shared()
282 …CeedCallCuda(ceed, cudaMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, cuda… in CeedBasisApplyAtPointsCore_Cuda_shared()
290 if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints)); in CeedBasisApplyAtPointsCore_Cuda_shared()
292 …CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 8, "BASIS_Q_1D"… in CeedBasisApplyAtPointsCore_Cuda_shared()
295 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPo… in CeedBasisApplyAtPointsCore_Cuda_shared()
296 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->I… in CeedBasisApplyAtPointsCore_Cuda_shared()
297 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpTransposeAddAtPoints", &data… in CeedBasisApplyAtPointsCore_Cuda_shared()
298 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints… in CeedBasisApplyAtPointsCore_Cuda_shared()
299 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->Gra… in CeedBasisApplyAtPointsCore_Cuda_shared()
300 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradTransposeAddAtPoints", &data->… in CeedBasisApplyAtPointsCore_Cuda_shared()
322 …void *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, … in CeedBasisApplyAtPointsCore_Cuda_shared()
331 …allBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAddAtPoints : data->… in CeedBasisApplyAtPointsCore_Cuda_shared()
334 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->InterpAtPoints, NULL, grid, thread_1d, 1, … in CeedBasisApplyAtPointsCore_Cuda_shared()
345 …allBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAddAtPoints : data->… in CeedBasisApplyAtPointsCore_Cuda_shared()
348 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thr… in CeedBasisApplyAtPointsCore_Cuda_shared()
357 …allBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAddAtPoints : data->… in CeedBasisApplyAtPointsCore_Cuda_shared()
360 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thr… in CeedBasisApplyAtPointsCore_Cuda_shared()
372 …void *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d… in CeedBasisApplyAtPointsCore_Cuda_shared()
381 …CallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAddAtPoints : data->G… in CeedBasisApplyAtPointsCore_Cuda_shared()
384 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->GradAtPoints, NULL, grid, thread_1d, 1, el… in CeedBasisApplyAtPointsCore_Cuda_shared()
394 …CallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAddAtPoints : data->G… in CeedBasisApplyAtPointsCore_Cuda_shared()
397 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->GradAtPoints, NULL, grid, thread_1d, threa… in CeedBasisApplyAtPointsCore_Cuda_shared()
406 …CallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAddAtPoints : data->G… in CeedBasisApplyAtPointsCore_Cuda_shared()
409 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->GradAtPoints, NULL, grid, thread_1d, threa… in CeedBasisApplyAtPointsCore_Cuda_shared()
455 CeedBasis_Cuda_shared *data; in CeedBasisApplyNonTensorCore_Cuda_shared() local
459 CeedCallBackend(CeedBasisGetData(basis, &data)); in CeedBasisApplyNonTensorCore_Cuda_shared()
476 …CeedCheck(data->d_interp_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; interp not set", CeedEva… in CeedBasisApplyNonTensorCore_Cuda_shared()
481 void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyNonTensorCore_Cuda_shared()
490 …eedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->Inte… in CeedBasisApplyNonTensorCore_Cuda_shared()
493 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread, 1, elems_per_b… in CeedBasisApplyNonTensorCore_Cuda_shared()
500 …CeedCheck(data->d_grad_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; grad not set", CeedEvalMod… in CeedBasisApplyNonTensorCore_Cuda_shared()
505 void *grad_args[] = {(void *)&num_elem, &data->d_grad_1d, &d_u, &d_v}; in CeedBasisApplyNonTensorCore_Cuda_shared()
514 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, apply_add ? data->GradTransposeAdd : data->GradT… in CeedBasisApplyNonTensorCore_Cuda_shared()
517 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Grad, NULL, grid, thread, 1, elems_per_blo… in CeedBasisApplyNonTensorCore_Cuda_shared()
524 …CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", Ce… in CeedBasisApplyNonTensorCore_Cuda_shared()
529 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; in CeedBasisApplyNonTensorCore_Cuda_shared()
536 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid, thread, elems_per_block, 1, weight… in CeedBasisApplyNonTensorCore_Cuda_shared()
573 CeedBasis_Cuda_shared *data; in CeedBasisDestroy_Cuda_shared() local
576 CeedCallBackend(CeedBasisGetData(basis, &data)); in CeedBasisDestroy_Cuda_shared()
577 CeedCallCuda(ceed, cuModuleUnload(data->module)); in CeedBasisDestroy_Cuda_shared()
578 if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints)); in CeedBasisDestroy_Cuda_shared()
579 if (data->d_q_weight_1d) CeedCallCuda(ceed, cudaFree(data->d_q_weight_1d)); in CeedBasisDestroy_Cuda_shared()
580 CeedCallBackend(CeedFree(&data->h_points_per_elem)); in CeedBasisDestroy_Cuda_shared()
581 if (data->d_points_per_elem) CeedCallCuda(ceed, cudaFree(data->d_points_per_elem)); in CeedBasisDestroy_Cuda_shared()
582 CeedCallCuda(ceed, cudaFree(data->d_interp_1d)); in CeedBasisDestroy_Cuda_shared()
583 CeedCallCuda(ceed, cudaFree(data->d_grad_1d)); in CeedBasisDestroy_Cuda_shared()
584 CeedCallCuda(ceed, cudaFree(data->d_collo_grad_1d)); in CeedBasisDestroy_Cuda_shared()
585 CeedCallCuda(ceed, cudaFree(data->d_chebyshev_interp_1d)); in CeedBasisDestroy_Cuda_shared()
586 CeedCallBackend(CeedFree(&data)); in CeedBasisDestroy_Cuda_shared()
600 CeedBasis_Cuda_shared *data; in CeedBasisCreateTensorH1_Cuda_shared() local
603 CeedCallBackend(CeedCalloc(1, &data)); in CeedBasisCreateTensorH1_Cuda_shared()
607 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_q_weight_1d, q_bytes)); in CeedBasisCreateTensorH1_Cuda_shared()
608 … CeedCallCuda(ceed, cudaMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, cudaMemcpyHostToDevice)); in CeedBasisCreateTensorH1_Cuda_shared()
610 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_interp_1d, interp_bytes)); in CeedBasisCreateTensorH1_Cuda_shared()
611 …CeedCallCuda(ceed, cudaMemcpy(data->d_interp_1d, interp_1d, interp_bytes, cudaMemcpyHostToDevice)); in CeedBasisCreateTensorH1_Cuda_shared()
612 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_grad_1d, interp_bytes)); in CeedBasisCreateTensorH1_Cuda_shared()
613 CeedCallCuda(ceed, cudaMemcpy(data->d_grad_1d, grad_1d, interp_bytes, cudaMemcpyHostToDevice)); in CeedBasisCreateTensorH1_Cuda_shared()
616 data->d_collo_grad_1d = NULL; in CeedBasisCreateTensorH1_Cuda_shared()
624 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_collo_grad_1d, q_bytes * Q_1d)); in CeedBasisCreateTensorH1_Cuda_shared()
625 …CeedCallCuda(ceed, cudaMemcpy(data->d_collo_grad_1d, collo_grad_1d, q_bytes * Q_1d, cudaMemcpyHost… in CeedBasisCreateTensorH1_Cuda_shared()
634 …CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 8, "BASIS_Q_1D", Q_1d, … in CeedBasisCreateTensorH1_Cuda_shared()
638 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, is_collocated ? "InterpCollocated" : "Inter… in CeedBasisCreateTensorH1_Cuda_shared()
639 …lBackend(CeedGetKernel_Cuda(ceed, data->module, is_collocated ? "InterpCollocatedTranspose" : "Int… in CeedBasisCreateTensorH1_Cuda_shared()
640 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, is_collocated ? "InterpCollocatedTransposeA… in CeedBasisCreateTensorH1_Cuda_shared()
641 &data->InterpTransposeAdd)); in CeedBasisCreateTensorH1_Cuda_shared()
642 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, is_collocated ? "GradCollocated" : "Grad", … in CeedBasisCreateTensorH1_Cuda_shared()
643 …allBackend(CeedGetKernel_Cuda(ceed, data->module, is_collocated ? "GradCollocatedTranspose" : "Gra… in CeedBasisCreateTensorH1_Cuda_shared()
644 …Backend(CeedGetKernel_Cuda(ceed, data->module, is_collocated ? "GradCollocatedTransposeAdd" : "Gra… in CeedBasisCreateTensorH1_Cuda_shared()
645 CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); in CeedBasisCreateTensorH1_Cuda_shared()
647 CeedCallBackend(CeedBasisSetData(basis, data)); in CeedBasisCreateTensorH1_Cuda_shared()
667 CeedBasis_Cuda_shared *data; in CeedBasisCreateH1_Cuda_shared() local
684 CeedCallBackend(CeedCalloc(1, &data)); in CeedBasisCreateH1_Cuda_shared()
690 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_q_weight_1d, q_bytes)); in CeedBasisCreateH1_Cuda_shared()
691 CeedCallCuda(ceed, cudaMemcpy(data->d_q_weight_1d, q_weight, q_bytes, cudaMemcpyHostToDevice)); in CeedBasisCreateH1_Cuda_shared()
696 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_interp_1d, interp_bytes)); in CeedBasisCreateH1_Cuda_shared()
697 CeedCallCuda(ceed, cudaMemcpy(data->d_interp_1d, interp, interp_bytes, cudaMemcpyHostToDevice)); in CeedBasisCreateH1_Cuda_shared()
702 CeedCallCuda(ceed, cudaMalloc((void **)&data->d_grad_1d, grad_bytes)); in CeedBasisCreateH1_Cuda_shared()
703 CeedCallCuda(ceed, cudaMemcpy(data->d_grad_1d, grad, grad_bytes, cudaMemcpyHostToDevice)); in CeedBasisCreateH1_Cuda_shared()
710 …CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts,… in CeedBasisCreateH1_Cuda_shared()
712 CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp)); in CeedBasisCreateH1_Cuda_shared()
713 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); in CeedBasisCreateH1_Cuda_shared()
714 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTransposeAdd", &data->InterpTranspos… in CeedBasisCreateH1_Cuda_shared()
715 CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad)); in CeedBasisCreateH1_Cuda_shared()
716 CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "GradTranspose", &data->GradTranspose)); in CeedBasisCreateH1_Cuda_shared()
717 …CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "GradTransposeAdd", &data->GradTransposeAdd… in CeedBasisCreateH1_Cuda_shared()
718 CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); in CeedBasisCreateH1_Cuda_shared()
720 CeedCallBackend(CeedBasisSetData(basis, data)); in CeedBasisCreateH1_Cuda_shared()