| /petsc/src/mat/impls/aij/mpi/mpicusparse/ |
| H A D | mpiaijcusparse.cu | 27 PetscCallCUDA(cudaFree(coo->Ajmap1)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 28 PetscCallCUDA(cudaFree(coo->Aperm1)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 29 PetscCallCUDA(cudaFree(coo->Bjmap1)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 30 PetscCallCUDA(cudaFree(coo->Bperm1)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 31 PetscCallCUDA(cudaFree(coo->Aimap2)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 32 PetscCallCUDA(cudaFree(coo->Ajmap2)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 33 PetscCallCUDA(cudaFree(coo->Aperm2)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 34 PetscCallCUDA(cudaFree(coo->Bimap2)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 35 PetscCallCUDA(cudaFree(coo->Bjmap2)); in MatCOOStructDestroy_MPIAIJCUSPARSE() 36 PetscCallCUDA(cudaFree(coo->Bperm2)); in MatCOOStructDestroy_MPIAIJCUSPARSE() [all …]
|
| /petsc/src/ksp/ksp/impls/hpddm/cuda/ |
| H A D | hpddm.cu | 25 PetscCallCUDA(cudaMalloc((void **)&ptr, 2 * N * sizeof(K))); in KSPSolve_HPDDM_CUDA_Private() 30 PetscCallCUDA(cudaMemcpy(host_ptr, ptr, 2 * N * sizeof(K), cudaMemcpyDeviceToHost)); in KSPSolve_HPDDM_CUDA_Private() 32 PetscCallCUDA(cudaMemcpy(ptr + N, host_ptr + N, N * sizeof(K), cudaMemcpyHostToDevice)); in KSPSolve_HPDDM_CUDA_Private() 34 PetscCallCUDA(cudaFree(ptr)); in KSPSolve_HPDDM_CUDA_Private() 42 PetscCallCUDA(cudaMemcpy(host_ptr, b, N * sizeof(PetscScalar), cudaMemcpyDeviceToHost)); in KSPSolve_HPDDM_CUDA_Private() 43 PetscCallCUDA(cudaMemcpy(host_ptr + N, x, N * sizeof(PetscScalar), cudaMemcpyDeviceToHost)); in KSPSolve_HPDDM_CUDA_Private() 45 PetscCallCUDA(cudaMemcpy(x, host_ptr + N, N * sizeof(PetscScalar), cudaMemcpyHostToDevice)); in KSPSolve_HPDDM_CUDA_Private()
|
| /petsc/src/benchmarks/streams/ |
| H A D | CUDAVersion.cu | 362 PetscCallCUDA(cudaSetDeviceFlags(cudaDeviceBlockingSync)); in main() 433 PetscCallCUDA(cudaMalloc((void **)&d_a, sizeof(float) * N)); in runStream() 434 PetscCallCUDA(cudaMalloc((void **)&d_b, sizeof(float) * N)); in runStream() 435 PetscCallCUDA(cudaMalloc((void **)&d_c, sizeof(float) * N)); in runStream() 493 PetscCallCUDA(cudaEventRecord(stop, 0)); in runStream() 559 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream() 560 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream() 573 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream() 574 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream() 587 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream() [all …]
|
| /petsc/src/vec/is/sf/impls/basic/nvshmem/ |
| H A D | sfnvshmem.cu | 122 PetscCallCUDA(cudaMalloc((void **)&sf->rootbufdisp_d, nRemoteRootRanks * sizeof(PetscInt))); in PetscSFSetUp_Basic_NVSHMEM() 123 PetscCallCUDA(cudaMalloc((void **)&sf->rootsigdisp_d, nRemoteRootRanks * sizeof(PetscInt))); in PetscSFSetUp_Basic_NVSHMEM() 124 PetscCallCUDA(cudaMalloc((void **)&sf->ranks_d, nRemoteRootRanks * sizeof(PetscMPIInt))); in PetscSFSetUp_Basic_NVSHMEM() 125 PetscCallCUDA(cudaMalloc((void **)&sf->roffset_d, (nRemoteRootRanks + 1) * sizeof(PetscInt))); in PetscSFSetUp_Basic_NVSHMEM() 127 …PetscCallCUDA(cudaMemcpyAsync(sf->rootbufdisp_d, sf->rootbufdisp, nRemoteRootRanks * sizeof(PetscI… in PetscSFSetUp_Basic_NVSHMEM() 128 …PetscCallCUDA(cudaMemcpyAsync(sf->rootsigdisp_d, sf->rootsigdisp, nRemoteRootRanks * sizeof(PetscI… in PetscSFSetUp_Basic_NVSHMEM() 129 …PetscCallCUDA(cudaMemcpyAsync(sf->ranks_d, sf->ranks + sf->ndranks, nRemoteRootRanks * sizeof(Pets… in PetscSFSetUp_Basic_NVSHMEM() 130 …PetscCallCUDA(cudaMemcpyAsync(sf->roffset_d, sf->roffset + sf->ndranks, (nRemoteRootRanks + 1) * s… in PetscSFSetUp_Basic_NVSHMEM() 145 PetscCallCUDA(cudaMalloc((void **)&bas->leafbufdisp_d, nRemoteLeafRanks * sizeof(PetscInt))); in PetscSFSetUp_Basic_NVSHMEM() 146 PetscCallCUDA(cudaMalloc((void **)&bas->leafsigdisp_d, nRemoteLeafRanks * sizeof(PetscInt))); in PetscSFSetUp_Basic_NVSHMEM() [all …]
|
| /petsc/src/mat/impls/cufft/ |
| H A D | cufft.cu | 43 …PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevi… in MatMult_SeqCUFFT() 47 …PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHo… in MatMult_SeqCUFFT() 80 …PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevi… in MatMultTranspose_SeqCUFFT() 84 …PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHo… in MatMultTranspose_SeqCUFFT() 98 PetscCallCUDA(cudaFree(cufft->devArray)); in MatDestroy_SeqCUFFT() 152 PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m)); in MatCreateSeqCUFFT()
|
| /petsc/src/mat/impls/aij/seq/seqcusparse/ |
| H A D | aijcusparse.cu | 229 PetscCallCUDA(cudaMalloc(&fs->csrRowPtr, sizeof(*fs->csrRowPtr) * (m + 1))); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 230 PetscCallCUDA(cudaMalloc(&fs->csrColIdx, sizeof(*fs->csrColIdx) * Mnz)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 231 PetscCallCUDA(cudaMalloc(&fs->csrVal, sizeof(*fs->csrVal) * Mnz)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 232 …PetscCallCUDA(cudaMemcpy(fs->csrRowPtr, Mi, sizeof(*fs->csrRowPtr) * (m + 1), cudaMemcpyHostToDevi… in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 233 …PetscCallCUDA(cudaMemcpy(fs->csrColIdx, Mj, sizeof(*fs->csrColIdx) * Mnz, cudaMemcpyHostToDevice)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 255 PetscCallCUDA(cudaMalloc((void **)&fs->X, sizeof(*fs->X) * m)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 256 PetscCallCUDA(cudaMalloc((void **)&fs->Y, sizeof(*fs->Y) * m)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 266 PetscCallCUDA(cudaMalloc((void **)&fs->spsvBuffer_U, fs->spsvBufferSize_U)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 267 PetscCallCUDA(cudaMalloc((void **)&fs->spsvBuffer_L, fs->spsvBufferSize_L)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() 285 PetscCallCUDA(cudaMemcpy(fs->csrVal, Ma, sizeof(*Ma) * Mnz, cudaMemcpyHostToDevice)); in MatSeqAIJCUSPARSEBuildFactoredMatrix_LU() [all …]
|
| /petsc/src/sys/objects/device/tests/ |
| H A D | ex2cu.cu | 22 …PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing… in main() 27 else PetscCallCUDA(cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar))); in main() 42 else PetscCallCUDA(cudaFree(ptrs[i])); in main()
|
| /petsc/include/ |
| H A D | petscdevice_cuda.h | 42 #define PetscCallCUDA(...) \ macro 48 #define PetscCallCUDA(...) \ macro 60 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__) 65 PetscCallCUDA(cudaGetLastError()); \ 67 PetscCallCUDA(cudaDeviceSynchronize()); \
|
| /petsc/src/sys/memory/cuda/ |
| H A D | mcudahost.cu | 6 PetscCallCUDA(cudaMallocHost(result, a)); in PetscCUDAHostMalloc() 12 PetscCallCUDA(cudaFreeHost(aa)); in PetscCUDAHostFree()
|
| /petsc/src/mat/tutorials/ |
| H A D | ex18cu.cu | 21 PetscCallCUDA(cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar))); in FillMatrixCUDACOO() 24 PetscCallCUDA(cudaFree(v)); in FillMatrixCUDACOO()
|
| /petsc/src/ksp/pc/impls/vpbjacobi/cuda/ |
| H A D | vpbjacobi_cuda.cu | 36 PetscCallCUDA(cudaMemcpy(bs_d, bs_h, sizeof(PetscInt) * (nblocks + 1), cudaMemcpyHostToDevice)); in UpdateOffsetsOnDevice() 37 … PetscCallCUDA(cudaMemcpy(bs2_d, bs2_h, sizeof(PetscInt) * (nblocks + 1), cudaMemcpyHostToDevice)); in UpdateOffsetsOnDevice() 38 PetscCallCUDA(cudaMemcpy(matIdx_d, matIdx_h, sizeof(PetscInt) * n, cudaMemcpyHostToDevice)); in UpdateOffsetsOnDevice() 39 PetscCallCUDA(cudaMemcpy(diag_d, diag_h, sizeof(MatScalar) * nsize, cudaMemcpyHostToDevice)); in UpdateOffsetsOnDevice() 127 PetscCallCUDA(cudaGetLastError()); in PCApplyOrTranspose_VPBJacobi_CUDA()
|
| /petsc/src/mat/impls/sell/seq/seqcuda/ |
| H A D | sellcuda.cu | 26 if ((*cudastruct)->colidx) PetscCallCUDA(cudaFree((*cudastruct)->colidx)); in MatSeqSELLCUDA_Destroy() 27 if ((*cudastruct)->val) PetscCallCUDA(cudaFree((*cudastruct)->val)); in MatSeqSELLCUDA_Destroy() 28 if ((*cudastruct)->sliidx) PetscCallCUDA(cudaFree((*cudastruct)->sliidx)); in MatSeqSELLCUDA_Destroy() 29 if ((*cudastruct)->chunk_slice_map) PetscCallCUDA(cudaFree((*cudastruct)->chunk_slice_map)); in MatSeqSELLCUDA_Destroy() 45 …PetscCallCUDA(cudaMemcpy(cudastruct->val, a->val, a->sliidx[a->totalslices] * sizeof(MatScalar), c… in MatSeqSELLCUDACopyToGPU() 48 if (cudastruct->colidx) PetscCallCUDA(cudaFree(cudastruct->colidx)); in MatSeqSELLCUDACopyToGPU() 49 if (cudastruct->val) PetscCallCUDA(cudaFree(cudastruct->val)); in MatSeqSELLCUDACopyToGPU() 50 if (cudastruct->sliidx) PetscCallCUDA(cudaFree(cudastruct->sliidx)); in MatSeqSELLCUDACopyToGPU() 51 if (cudastruct->chunk_slice_map) PetscCallCUDA(cudaFree(cudastruct->chunk_slice_map)); in MatSeqSELLCUDACopyToGPU() 56 …PetscCallCUDA(cudaMalloc((void **)&cudastruct->colidx, a->maxallocmat * sizeof(*cudastruct->colidx… in MatSeqSELLCUDACopyToGPU() [all …]
|
| /petsc/src/mat/impls/hypre/cuda/ |
| H A D | hypre1.cu | 15 PetscCallCUDA(cudaGetLastError()); in MatZeroRows_CUDA()
|
| /petsc/src/ksp/pc/impls/pbjacobi/cuda/ |
| H A D | pbjacobi_cuda.cu | 55 PetscCallCUDA(cudaGetLastError()); in PCApplyOrTranspose_PBJacobi_CUDA() 82 PetscCallCUDA(cudaFree(jac->spptr)); in PCDestroy_PBJacobi_CUDA()
|
| /petsc/src/vec/vec/tests/ |
| H A D | benchmark_veccreate.c | 29 PetscCallCUDA(WaitForCUDA()); in main()
|
| H A D | ex15k.kokkos.cxx | 20 #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize())
|
| /petsc/src/mat/tests/ |
| H A D | ex5k.kokkos.cxx | 35 #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize())
|
| H A D | ex6k.kokkos.cxx | 37 #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize())
|
| /petsc/src/ksp/pc/impls/amgx/ |
| H A D | amgx.cxx | 270 …PetscCallCUDA(cudaMemcpy(&amgx->nnz, &rowOffsets[amgx->nLocalRows], sizeof(int), cudaMemcpyDefault… in PCSetUp_AMGX() 617 PetscCallCUDA(cudaGetDevice(&amgx->devID)); in PCCreate_AMGX()
|
| /petsc/src/tao/unconstrained/tutorials/ |
| H A D | rosenbrock4.h | 311 PetscCallCUDA(cudaGetLastError()); \
|
| /petsc/doc/changes/ |
| H A D | 317.md | 71 - Add `PetscCallCUDA()`, `PetscCallCUBLAS()`, `PetscCallCUSPARSE()`, `PetscCallCUSOLVER()`, `PetscC…
|
| /petsc/src/ksp/pc/impls/fieldsplit/ |
| H A D | fieldsplit.c | 1149 …else if (PetscMemTypeCUDA(mtype)) PetscCallCUDA(cudaMalloc((void **)&array, sizeof(PetscScalar) * … in PCSetUpOnBlocks_FieldSplit_Schur() 1486 PetscCallCUDA(cudaFree(array)); in PCMatApply_FieldSplit_Schur() 1487 PetscCallCUDA(cudaMalloc((void **)&array, sizeof(PetscScalar) * m * (P + Q))); in PCMatApply_FieldSplit_Schur()
|