xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpicusparsematimpl.h (revision 219fbbafcab96f1b7f27d5f571f31c0ed2202c25)
1519f805aSKarl Rupp #if !defined(__MPICUSPARSEMATIMPL)
2bbf3fe20SPaul Mullowney #define __MPICUSPARSEMATIMPL
3bbf3fe20SPaul Mullowney 
4b06137fdSPaul Mullowney #include <cusparse_v2.h>
5303a667bSSatish Balay #include <petsc/private/cudavecimpl.h>
6b06137fdSPaul Mullowney 
7*219fbbafSJunchao Zhang struct Mat_MPIAIJCUSPARSE {
8bbf3fe20SPaul Mullowney   /* The following are used by GPU capabilities to store matrix storage formats on the device */
9e057df02SPaul Mullowney   MatCUSPARSEStorageFormat   diagGPUMatFormat;
10e057df02SPaul Mullowney   MatCUSPARSEStorageFormat   offdiagGPUMatFormat;
11b06137fdSPaul Mullowney   cudaStream_t               stream;
12b06137fdSPaul Mullowney   cusparseHandle_t           handle;
13042217e8SBarry Smith   PetscSplitCSRDataStructure deviceMat;
14ddea5d60SJunchao Zhang   PetscInt                   coo_nd,coo_no; /* number of nonzero entries in coo for the diag/offdiag part */
15ddea5d60SJunchao Zhang   THRUSTINTARRAY             *coo_p; /* the permutation array that partitions the coo array into diag/offdiag parts */
16ddea5d60SJunchao Zhang   THRUSTARRAY                *coo_pw; /* the work array that stores the partitioned coo scalar values */
17*219fbbafSJunchao Zhang 
18*219fbbafSJunchao Zhang   /* Extended COO stuff */
19*219fbbafSJunchao Zhang   PetscCount  *Aimap1_d,*Ajmap1_d,*Aperm1_d; /* Local entries to diag */
20*219fbbafSJunchao Zhang   PetscCount  *Bimap1_d,*Bjmap1_d,*Bperm1_d; /* Local entries to offdiag */
21*219fbbafSJunchao Zhang   PetscCount  *Aimap2_d,*Ajmap2_d,*Aperm2_d; /* Remote entries to diag */
22*219fbbafSJunchao Zhang   PetscCount  *Bimap2_d,*Bjmap2_d,*Bperm2_d; /* Remote entries to offdiag */
23*219fbbafSJunchao Zhang   PetscCount  *Cperm1_d; /* Permutation to fill send buffer. 'C' for communication */
24*219fbbafSJunchao Zhang   PetscScalar *sendbuf_d,*recvbuf_d; /* Buffers for remote values in MatSetValuesCOO() */
25*219fbbafSJunchao Zhang   PetscBool   use_extended_coo;
26*219fbbafSJunchao Zhang 
27*219fbbafSJunchao Zhang   Mat_MPIAIJCUSPARSE() {
28*219fbbafSJunchao Zhang     diagGPUMatFormat    = MAT_CUSPARSE_CSR;
29*219fbbafSJunchao Zhang     offdiagGPUMatFormat = MAT_CUSPARSE_CSR;
30*219fbbafSJunchao Zhang     coo_p               = NULL;
31*219fbbafSJunchao Zhang     coo_pw              = NULL;
32*219fbbafSJunchao Zhang     stream              = 0;
33*219fbbafSJunchao Zhang     deviceMat           = NULL;
34*219fbbafSJunchao Zhang     use_extended_coo    = PETSC_FALSE;
35*219fbbafSJunchao Zhang   }
36*219fbbafSJunchao Zhang };
37bbf3fe20SPaul Mullowney 
38b06137fdSPaul Mullowney PETSC_INTERN PetscErrorCode MatCUSPARSESetStream(Mat, const cudaStream_t stream);
39b06137fdSPaul Mullowney PETSC_INTERN PetscErrorCode MatCUSPARSESetHandle(Mat, const cusparseHandle_t handle);
40b06137fdSPaul Mullowney PETSC_INTERN PetscErrorCode MatCUSPARSEClearHandle(Mat);
41b06137fdSPaul Mullowney 
42bbf3fe20SPaul Mullowney #endif
43