ceed-cuda-compile.cpp - OpenGrok cross reference for /libCEED/backends/cuda/ceed-cuda-compile.cpp

Deleted Added

sdiffudifftextold (d7c59329..)new (e9c76bdd..)

ceed-cuda-compile.cpp (d7c593293e00f7bc51f26f45435d6890398f4ce7)	ceed-cuda-compile.cpp (e9c76bddc0f2a44f522e0176ed6b7e0c0aa1df73)
1// Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include "ceed-cuda-compile.h" --- 192 unchanged lines hidden (view full) --- 201 CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, CeedDivUpInt(points, max_block_size), max_block_size, args)); 202 return CEED_ERROR_SUCCESS; 203} 204 205//------------------------------------------------------------------------------ 206// Run CUDA kernel 207//------------------------------------------------------------------------------ 208int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size, void **args) {	1// Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include "ceed-cuda-compile.h" --- 192 unchanged lines hidden (view full) --- 201 CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, CeedDivUpInt(points, max_block_size), max_block_size, args)); 202 return CEED_ERROR_SUCCESS; 203} 204 205//------------------------------------------------------------------------------ 206// Run CUDA kernel 207//------------------------------------------------------------------------------ 208int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size, void **args) {
209 CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, grid_size, block_size, 1, 1, 0, args));	209 CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size, 1, 1, 0, args));
210 return CEED_ERROR_SUCCESS; 211} 212 213//------------------------------------------------------------------------------ 214// Run CUDA kernel for spatial dimension 215//------------------------------------------------------------------------------ 216int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x, const int block_size_y, const int block_size_z, 217 void **args) {	210 return CEED_ERROR_SUCCESS; 211} 212 213//------------------------------------------------------------------------------ 214// Run CUDA kernel for spatial dimension 215//------------------------------------------------------------------------------ 216int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x, const int block_size_y, const int block_size_z, 217 void **args) {
218 CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, grid_size, block_size_x, block_size_y, block_size_z, 0, args));	218 CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size_x, block_size_y, block_size_z, 0, args));
219 return CEED_ERROR_SUCCESS; 220} 221 222//------------------------------------------------------------------------------ 223// Run CUDA kernel for spatial dimension with shared memory 224//------------------------------------------------------------------------------	219 return CEED_ERROR_SUCCESS; 220} 221 222//------------------------------------------------------------------------------ 223// Run CUDA kernel for spatial dimension with shared memory 224//------------------------------------------------------------------------------
225static int CeedRunKernelDimSharedCore_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x, const int block_size_y, 226 const int block_size_z, const int shared_mem_size, const bool throw_error, bool is_good_run, 227 void *args) {	225static int CeedRunKernelDimSharedCore_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, 226 const int block_size_y, const int block_size_z, const int shared_mem_size, const bool throw_error, 227 bool is_good_run, void *args) {
228#if CUDA_VERSION >= 9000 229 cuFuncSetAttribute(kernel, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, shared_mem_size); 230#endif	228#if CUDA_VERSION >= 9000 229 cuFuncSetAttribute(kernel, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, shared_mem_size); 230#endif
231 CUresult result = cuLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z, shared_mem_size, NULL, args, NULL);	231 CUresult result = cuLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z, shared_mem_size, stream, args, NULL);
232 233 if (result == CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES) { 234 *is_good_run = false; 235 if (throw_error) { 236 int max_threads_per_block, shared_size_bytes, num_regs; 237 238 cuFuncGetAttribute(&max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, kernel); 239 cuFuncGetAttribute(&shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel); 240 cuFuncGetAttribute(&num_regs, CU_FUNC_ATTRIBUTE_NUM_REGS, kernel); 241 return CeedError(ceed, CEED_ERROR_BACKEND, 242 "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: max_threads_per_block %d on block size (%d,%d,%d), shared_size %d, num_regs %d", 243 max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); 244 } 245 } else CeedChk_Cu(ceed, result); 246 return CEED_ERROR_SUCCESS; 247} 248	232 233 if (result == CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES) { 234 *is_good_run = false; 235 if (throw_error) { 236 int max_threads_per_block, shared_size_bytes, num_regs; 237 238 cuFuncGetAttribute(&max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, kernel); 239 cuFuncGetAttribute(&shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel); 240 cuFuncGetAttribute(&num_regs, CU_FUNC_ATTRIBUTE_NUM_REGS, kernel); 241 return CeedError(ceed, CEED_ERROR_BACKEND, 242 "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: max_threads_per_block %d on block size (%d,%d,%d), shared_size %d, num_regs %d", 243 max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); 244 } 245 } else CeedChk_Cu(ceed, result); 246 return CEED_ERROR_SUCCESS; 247} 248
249int CeedRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x, const int block_size_y,	249int CeedRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, const int block_size_y,
250 const int block_size_z, const int shared_mem_size, void **args) { 251 bool is_good_run = true; 252	250 const int block_size_z, const int shared_mem_size, void **args) { 251 bool is_good_run = true; 252
253 CeedCallBackend( 254 CeedRunKernelDimSharedCore_Cuda(ceed, kernel, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, true, &is_good_run, args));	253 CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, true, 254 &is_good_run, args));
255 return CEED_ERROR_SUCCESS; 256} 257	255 return CEED_ERROR_SUCCESS; 256} 257
258int CeedTryRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x, const int block_size_y,	258int CeedTryRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, const int block_size_y,
259 const int block_size_z, const int shared_mem_size, bool is_good_run, void *args) {	259 const int block_size_z, const int shared_mem_size, bool is_good_run, void *args) {
260 CeedCallBackend( 261 CeedRunKernelDimSharedCore_Cuda(ceed, kernel, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, false, is_good_run, args));	260 CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, false, 261 is_good_run, args));
262 return CEED_ERROR_SUCCESS; 263} 264 265//------------------------------------------------------------------------------	262 return CEED_ERROR_SUCCESS; 263} 264 265//------------------------------------------------------------------------------