xref: /petsc/src/mat/impls/aij/seq/seqcusparse/cusparsematimpl.h (revision 9ae82921df069a58776bfe4da82b38e8ff7dd41c)
1*9ae82921SPaul Mullowney #ifndef __CUSPARSEMATIMPL
2*9ae82921SPaul Mullowney #define __CUSPARSEMATIMPL
3*9ae82921SPaul Mullowney 
4*9ae82921SPaul Mullowney #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h>
5*9ae82921SPaul Mullowney 
6*9ae82921SPaul Mullowney #include <cusparse_v2.h>
7*9ae82921SPaul Mullowney 
8*9ae82921SPaul Mullowney /* New Way */
9*9ae82921SPaul Mullowney #include "tx_sparse_interface.h"
10*9ae82921SPaul Mullowney 
11*9ae82921SPaul Mullowney // this is such a hack ... but I don't know of another way to pass this variable
12*9ae82921SPaul Mullowney // from one GPU_Matrix_Ifc class to another. This is necessary for the parallel
13*9ae82921SPaul Mullowney //  SpMV. Essentially, I need to use the same stream variable in two different
14*9ae82921SPaul Mullowney //  data structures. I do this by creating a single instance of that stream
15*9ae82921SPaul Mullowney //  and reuse it.
16*9ae82921SPaul Mullowney cudaStream_t theBodyStream=0;
17*9ae82921SPaul Mullowney 
18*9ae82921SPaul Mullowney #include <algorithm>
19*9ae82921SPaul Mullowney #include <vector>
20*9ae82921SPaul Mullowney #include <string>
21*9ae82921SPaul Mullowney #include <thrust/sort.h>
22*9ae82921SPaul Mullowney #include <thrust/fill.h>
23*9ae82921SPaul Mullowney 
24*9ae82921SPaul Mullowney // Single instance of the cusparse handle for the class.
25*9ae82921SPaul Mullowney cusparseHandle_t MAT_cusparseHandle=0;
26*9ae82921SPaul Mullowney 
27*9ae82921SPaul Mullowney struct Mat_SeqAIJCUSPARSETriFactors {
28*9ae82921SPaul Mullowney   GPU_Matrix_Ifc* loTriFactorPtr; /* pointer for lower triangular (factored matrix) on GPU */
29*9ae82921SPaul Mullowney   GPU_Matrix_Ifc* upTriFactorPtr; /* pointer for upper triangular (factored matrix) on GPU */
30*9ae82921SPaul Mullowney   CUSPARRAY* tempvec;
31*9ae82921SPaul Mullowney   const GPUStorageFormat  format;  /* the storage format for the matrix on the device */
32*9ae82921SPaul Mullowney };
33*9ae82921SPaul Mullowney 
34*9ae82921SPaul Mullowney struct Mat_SeqAIJCUSPARSE {
35*9ae82921SPaul Mullowney   GPU_Matrix_Ifc*   mat; /* pointer to the matrix on the GPU */
36*9ae82921SPaul Mullowney   CUSPARRAY*        tempvec; /*pointer to a workvector to which we can copy the relevant indices of a vector we want to multiply */
37*9ae82921SPaul Mullowney   PetscInt          nonzerorow; /* number of nonzero rows ... used in the flop calculations */
38*9ae82921SPaul Mullowney   const GPUStorageFormat  format;   /* the storage format for the matrix on the device */
39*9ae82921SPaul Mullowney };
40*9ae82921SPaul Mullowney 
41*9ae82921SPaul Mullowney extern PetscErrorCode MatCUSPARSECopyToGPU(Mat);
42*9ae82921SPaul Mullowney //extern PetscErrorCode MatGetFactor_seqaij_cusparse(Mat,MatFactorType,Mat*);
43*9ae82921SPaul Mullowney //extern PetscErrorCode MatFactorGetSolverPackage_seqaij_cusparse(Mat,const MatSolverPackage *);
44*9ae82921SPaul Mullowney //extern PetscErrorCode MatCUSPARSECopyFromGPU(Mat, CUSPMATRIX *);
45*9ae82921SPaul Mullowney #endif
46