1*9ae82921SPaul Mullowney #ifndef __CUSPARSEMATIMPL 2*9ae82921SPaul Mullowney #define __CUSPARSEMATIMPL 3*9ae82921SPaul Mullowney 4*9ae82921SPaul Mullowney #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h> 5*9ae82921SPaul Mullowney 6*9ae82921SPaul Mullowney #include <cusparse_v2.h> 7*9ae82921SPaul Mullowney 8*9ae82921SPaul Mullowney /* New Way */ 9*9ae82921SPaul Mullowney #include "tx_sparse_interface.h" 10*9ae82921SPaul Mullowney 11*9ae82921SPaul Mullowney // this is such a hack ... but I don't know of another way to pass this variable 12*9ae82921SPaul Mullowney // from one GPU_Matrix_Ifc class to another. This is necessary for the parallel 13*9ae82921SPaul Mullowney // SpMV. Essentially, I need to use the same stream variable in two different 14*9ae82921SPaul Mullowney // data structures. I do this by creating a single instance of that stream 15*9ae82921SPaul Mullowney // and reuse it. 16*9ae82921SPaul Mullowney cudaStream_t theBodyStream=0; 17*9ae82921SPaul Mullowney 18*9ae82921SPaul Mullowney #include <algorithm> 19*9ae82921SPaul Mullowney #include <vector> 20*9ae82921SPaul Mullowney #include <string> 21*9ae82921SPaul Mullowney #include <thrust/sort.h> 22*9ae82921SPaul Mullowney #include <thrust/fill.h> 23*9ae82921SPaul Mullowney 24*9ae82921SPaul Mullowney // Single instance of the cusparse handle for the class. 25*9ae82921SPaul Mullowney cusparseHandle_t MAT_cusparseHandle=0; 26*9ae82921SPaul Mullowney 27*9ae82921SPaul Mullowney struct Mat_SeqAIJCUSPARSETriFactors { 28*9ae82921SPaul Mullowney GPU_Matrix_Ifc* loTriFactorPtr; /* pointer for lower triangular (factored matrix) on GPU */ 29*9ae82921SPaul Mullowney GPU_Matrix_Ifc* upTriFactorPtr; /* pointer for upper triangular (factored matrix) on GPU */ 30*9ae82921SPaul Mullowney CUSPARRAY* tempvec; 31*9ae82921SPaul Mullowney const GPUStorageFormat format; /* the storage format for the matrix on the device */ 32*9ae82921SPaul Mullowney }; 33*9ae82921SPaul Mullowney 34*9ae82921SPaul Mullowney struct Mat_SeqAIJCUSPARSE { 35*9ae82921SPaul Mullowney GPU_Matrix_Ifc* mat; /* pointer to the matrix on the GPU */ 36*9ae82921SPaul Mullowney CUSPARRAY* tempvec; /*pointer to a workvector to which we can copy the relevant indices of a vector we want to multiply */ 37*9ae82921SPaul Mullowney PetscInt nonzerorow; /* number of nonzero rows ... used in the flop calculations */ 38*9ae82921SPaul Mullowney const GPUStorageFormat format; /* the storage format for the matrix on the device */ 39*9ae82921SPaul Mullowney }; 40*9ae82921SPaul Mullowney 41*9ae82921SPaul Mullowney extern PetscErrorCode MatCUSPARSECopyToGPU(Mat); 42*9ae82921SPaul Mullowney //extern PetscErrorCode MatGetFactor_seqaij_cusparse(Mat,MatFactorType,Mat*); 43*9ae82921SPaul Mullowney //extern PetscErrorCode MatFactorGetSolverPackage_seqaij_cusparse(Mat,const MatSolverPackage *); 44*9ae82921SPaul Mullowney //extern PetscErrorCode MatCUSPARSECopyFromGPU(Mat, CUSPMATRIX *); 45*9ae82921SPaul Mullowney #endif 46