1519f805aSKarl Rupp #if !defined(__MPICUSPARSEMATIMPL) 2bbf3fe20SPaul Mullowney #define __MPICUSPARSEMATIMPL 3bbf3fe20SPaul Mullowney 4b06137fdSPaul Mullowney #include <cusparse_v2.h> 5303a667bSSatish Balay #include <petsc/private/cudavecimpl.h> 6b06137fdSPaul Mullowney 7*219fbbafSJunchao Zhang struct Mat_MPIAIJCUSPARSE { 8bbf3fe20SPaul Mullowney /* The following are used by GPU capabilities to store matrix storage formats on the device */ 9e057df02SPaul Mullowney MatCUSPARSEStorageFormat diagGPUMatFormat; 10e057df02SPaul Mullowney MatCUSPARSEStorageFormat offdiagGPUMatFormat; 11b06137fdSPaul Mullowney cudaStream_t stream; 12b06137fdSPaul Mullowney cusparseHandle_t handle; 13042217e8SBarry Smith PetscSplitCSRDataStructure deviceMat; 14ddea5d60SJunchao Zhang PetscInt coo_nd,coo_no; /* number of nonzero entries in coo for the diag/offdiag part */ 15ddea5d60SJunchao Zhang THRUSTINTARRAY *coo_p; /* the permutation array that partitions the coo array into diag/offdiag parts */ 16ddea5d60SJunchao Zhang THRUSTARRAY *coo_pw; /* the work array that stores the partitioned coo scalar values */ 17*219fbbafSJunchao Zhang 18*219fbbafSJunchao Zhang /* Extended COO stuff */ 19*219fbbafSJunchao Zhang PetscCount *Aimap1_d,*Ajmap1_d,*Aperm1_d; /* Local entries to diag */ 20*219fbbafSJunchao Zhang PetscCount *Bimap1_d,*Bjmap1_d,*Bperm1_d; /* Local entries to offdiag */ 21*219fbbafSJunchao Zhang PetscCount *Aimap2_d,*Ajmap2_d,*Aperm2_d; /* Remote entries to diag */ 22*219fbbafSJunchao Zhang PetscCount *Bimap2_d,*Bjmap2_d,*Bperm2_d; /* Remote entries to offdiag */ 23*219fbbafSJunchao Zhang PetscCount *Cperm1_d; /* Permutation to fill send buffer. 'C' for communication */ 24*219fbbafSJunchao Zhang PetscScalar *sendbuf_d,*recvbuf_d; /* Buffers for remote values in MatSetValuesCOO() */ 25*219fbbafSJunchao Zhang PetscBool use_extended_coo; 26*219fbbafSJunchao Zhang 27*219fbbafSJunchao Zhang Mat_MPIAIJCUSPARSE() { 28*219fbbafSJunchao Zhang diagGPUMatFormat = MAT_CUSPARSE_CSR; 29*219fbbafSJunchao Zhang offdiagGPUMatFormat = MAT_CUSPARSE_CSR; 30*219fbbafSJunchao Zhang coo_p = NULL; 31*219fbbafSJunchao Zhang coo_pw = NULL; 32*219fbbafSJunchao Zhang stream = 0; 33*219fbbafSJunchao Zhang deviceMat = NULL; 34*219fbbafSJunchao Zhang use_extended_coo = PETSC_FALSE; 35*219fbbafSJunchao Zhang } 36*219fbbafSJunchao Zhang }; 37bbf3fe20SPaul Mullowney 38b06137fdSPaul Mullowney PETSC_INTERN PetscErrorCode MatCUSPARSESetStream(Mat, const cudaStream_t stream); 39b06137fdSPaul Mullowney PETSC_INTERN PetscErrorCode MatCUSPARSESetHandle(Mat, const cusparseHandle_t handle); 40b06137fdSPaul Mullowney PETSC_INTERN PetscErrorCode MatCUSPARSEClearHandle(Mat); 41b06137fdSPaul Mullowney 42bbf3fe20SPaul Mullowney #endif 43