1519f805aSKarl Rupp #if !defined(__CUSPARSEMATIMPL) 29ae82921SPaul Mullowney #define __CUSPARSEMATIMPL 39ae82921SPaul Mullowney 49ae82921SPaul Mullowney #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h> 59ae82921SPaul Mullowney 69ae82921SPaul Mullowney #include <cusparse_v2.h> 79ae82921SPaul Mullowney 89ae82921SPaul Mullowney #include <algorithm> 99ae82921SPaul Mullowney #include <vector> 109ae82921SPaul Mullowney #include <thrust/sort.h> 119ae82921SPaul Mullowney #include <thrust/fill.h> 12*aa372e3fSPaul Mullowney #include <cusp/csr_matrix.h> 139ae82921SPaul Mullowney 1457eb53cdSKarl Rupp /* Single instance of the cusparse handle for the class. */ 15e057df02SPaul Mullowney MatCUSPARSEStorageFormat cusparseMatSolveStorageFormat=MAT_CUSPARSE_CSR; 16e057df02SPaul Mullowney 17*aa372e3fSPaul Mullowney #if defined(PETSC_USE_COMPLEX) 18*aa372e3fSPaul Mullowney #if defined(PETSC_USE_REAL_SINGLE) 19*aa372e3fSPaul Mullowney #define cusparse_solve cusparseCcsrsv_solve 20*aa372e3fSPaul Mullowney #define cusparse_analysis cusparseCcsrsv_analysis 21*aa372e3fSPaul Mullowney #define cusparse_csr_spmv cusparseCcsrmv 22*aa372e3fSPaul Mullowney #define cusparse_csr2csc cusparseCcsr2csc 23*aa372e3fSPaul Mullowney #define cusparse_hyb_spmv cusparseChybmv 24*aa372e3fSPaul Mullowney #define cusparse_csr2hyb cusparseCcsr2hyb 25*aa372e3fSPaul Mullowney #define cusparse_hyb2csr cusparseChyb2csr 26*aa372e3fSPaul Mullowney cuFloatComplex ALPHA = {1.0f, 0.0f}; 27*aa372e3fSPaul Mullowney cuFloatComplex BETA = {0.0f, 0.0f}; 28*aa372e3fSPaul Mullowney #elif defined(PETSC_USE_REAL_DOUBLE) 29*aa372e3fSPaul Mullowney #define cusparse_solve cusparseZcsrsv_solve 30*aa372e3fSPaul Mullowney #define cusparse_analysis cusparseZcsrsv_analysis 31*aa372e3fSPaul Mullowney #define cusparse_csr_spmv cusparseZcsrmv 32*aa372e3fSPaul Mullowney #define cusparse_csr2csc cusparseZcsr2csc 33*aa372e3fSPaul Mullowney #define cusparse_hyb_spmv cusparseZhybmv 34*aa372e3fSPaul Mullowney #define cusparse_csr2hyb cusparseZcsr2hyb 35*aa372e3fSPaul Mullowney #define cusparse_hyb2csr cusparseZhyb2csr 36*aa372e3fSPaul Mullowney cuDoubleComplex ALPHA = {1.0, 0.0}; 37*aa372e3fSPaul Mullowney cuDoubleComplex BETA = {0.0, 0.0}; 38*aa372e3fSPaul Mullowney #endif 39*aa372e3fSPaul Mullowney #else 40*aa372e3fSPaul Mullowney PetscScalar ALPHA = 1.0; 41*aa372e3fSPaul Mullowney PetscScalar BETA = 0.0; 42*aa372e3fSPaul Mullowney #if defined(PETSC_USE_REAL_SINGLE) 43*aa372e3fSPaul Mullowney #define cusparse_solve cusparseScsrsv_solve 44*aa372e3fSPaul Mullowney #define cusparse_analysis cusparseScsrsv_analysis 45*aa372e3fSPaul Mullowney #define cusparse_csr_spmv cusparseScsrmv 46*aa372e3fSPaul Mullowney #define cusparse_csr2csc cusparseScsr2csc 47*aa372e3fSPaul Mullowney #define cusparse_hyb_spmv cusparseShybmv 48*aa372e3fSPaul Mullowney #define cusparse_csr2hyb cusparseScsr2hyb 49*aa372e3fSPaul Mullowney #define cusparse_hyb2csr cusparseShyb2csr 50*aa372e3fSPaul Mullowney #elif defined(PETSC_USE_REAL_DOUBLE) 51*aa372e3fSPaul Mullowney #define cusparse_solve cusparseDcsrsv_solve 52*aa372e3fSPaul Mullowney #define cusparse_analysis cusparseDcsrsv_analysis 53*aa372e3fSPaul Mullowney #define cusparse_csr_spmv cusparseDcsrmv 54*aa372e3fSPaul Mullowney #define cusparse_csr2csc cusparseDcsr2csc 55*aa372e3fSPaul Mullowney #define cusparse_hyb_spmv cusparseDhybmv 56*aa372e3fSPaul Mullowney #define cusparse_csr2hyb cusparseDcsr2hyb 57*aa372e3fSPaul Mullowney #define cusparse_hyb2csr cusparseDhyb2csr 58*aa372e3fSPaul Mullowney #endif 59*aa372e3fSPaul Mullowney #endif 60*aa372e3fSPaul Mullowney 61*aa372e3fSPaul Mullowney #define THRUSTINTARRAY32 thrust::device_vector<int> 62*aa372e3fSPaul Mullowney #define THRUSTINTARRAY thrust::device_vector<PetscInt> 63*aa372e3fSPaul Mullowney #define THRUSTARRAY thrust::device_vector<PetscScalar> 64*aa372e3fSPaul Mullowney 65*aa372e3fSPaul Mullowney /* A CSR matrix structure */ 66*aa372e3fSPaul Mullowney struct CsrMatrix { 67*aa372e3fSPaul Mullowney PetscInt num_rows; 68*aa372e3fSPaul Mullowney PetscInt num_cols; 69*aa372e3fSPaul Mullowney PetscInt num_entries; 70*aa372e3fSPaul Mullowney THRUSTINTARRAY32 *row_offsets; 71*aa372e3fSPaul Mullowney THRUSTINTARRAY32 *column_indices; 72*aa372e3fSPaul Mullowney THRUSTARRAY *values; 739ae82921SPaul Mullowney }; 749ae82921SPaul Mullowney 75*aa372e3fSPaul Mullowney //#define CUSPMATRIXCSR32 cusp::csr_matrix<int,PetscScalar,cusp::device_memory> 76*aa372e3fSPaul Mullowney 77*aa372e3fSPaul Mullowney /* This is struct holding the relevant data needed to a MatSolve */ 78*aa372e3fSPaul Mullowney struct Mat_SeqAIJCUSPARSETriFactorStruct { 79*aa372e3fSPaul Mullowney /* Data needed for triangular solve */ 80*aa372e3fSPaul Mullowney cusparseMatDescr_t descr; 81*aa372e3fSPaul Mullowney cusparseSolveAnalysisInfo_t solveInfo; 82*aa372e3fSPaul Mullowney cusparseOperation_t solveOp; 83*aa372e3fSPaul Mullowney CsrMatrix *csrMat; 84*aa372e3fSPaul Mullowney }; 85*aa372e3fSPaul Mullowney 86*aa372e3fSPaul Mullowney /* This is struct holding the relevant data needed to a MatMult */ 87*aa372e3fSPaul Mullowney struct Mat_SeqAIJCUSPARSEMultStruct { 88*aa372e3fSPaul Mullowney void *mat; /* opaque pointer to a matrix. This could be either a cusparseHybMat_t or a CsrMatrix */ 89*aa372e3fSPaul Mullowney cusparseMatDescr_t descr; /* Data needed to describe the matrix for a multiply */ 90*aa372e3fSPaul Mullowney THRUSTINTARRAY *cprowIndices; /* compressed row indices used in the parallel SpMV */ 91*aa372e3fSPaul Mullowney }; 92*aa372e3fSPaul Mullowney 93*aa372e3fSPaul Mullowney /* This is a larger struct holding all the triangular factors for a solve, transpose solve, and 94*aa372e3fSPaul Mullowney any indices used in a reordering */ 95*aa372e3fSPaul Mullowney struct Mat_SeqAIJCUSPARSETriFactors { 96*aa372e3fSPaul Mullowney Mat_SeqAIJCUSPARSETriFactorStruct *loTriFactorPtr; /* pointer for lower triangular (factored matrix) on GPU */ 97*aa372e3fSPaul Mullowney Mat_SeqAIJCUSPARSETriFactorStruct *upTriFactorPtr; /* pointer for upper triangular (factored matrix) on GPU */ 98*aa372e3fSPaul Mullowney Mat_SeqAIJCUSPARSETriFactorStruct *loTriFactorPtrTranspose; /* pointer for lower triangular (factored matrix) on GPU for the transpose (useful for BiCG) */ 99*aa372e3fSPaul Mullowney Mat_SeqAIJCUSPARSETriFactorStruct *upTriFactorPtrTranspose; /* pointer for upper triangular (factored matrix) on GPU for the transpose (useful for BiCG)*/ 100*aa372e3fSPaul Mullowney THRUSTINTARRAY *rpermIndices; /* indices used for any reordering */ 101*aa372e3fSPaul Mullowney THRUSTINTARRAY *cpermIndices; /* indices used for any reordering */ 102*aa372e3fSPaul Mullowney THRUSTARRAY *workVector; 103*aa372e3fSPaul Mullowney MatCUSPARSEStorageFormat format; /* the storage format for the matrix on the device */ 104*aa372e3fSPaul Mullowney cusparseHandle_t handle; /* a handle to the cusparse library */ 105*aa372e3fSPaul Mullowney PetscInt nnz; /* number of nonzeros ... need this for accurate logging between ICC and ILU */ 106*aa372e3fSPaul Mullowney }; 107*aa372e3fSPaul Mullowney 108*aa372e3fSPaul Mullowney /* This is a larger struct holding all the matrices for a SpMV, and SpMV Tranpose */ 1099ae82921SPaul Mullowney struct Mat_SeqAIJCUSPARSE { 110*aa372e3fSPaul Mullowney Mat_SeqAIJCUSPARSEMultStruct *mat; /* pointer to the matrix on the GPU */ 111*aa372e3fSPaul Mullowney Mat_SeqAIJCUSPARSEMultStruct *matTranspose; /* pointer to the matrix on the GPU (for the transpose ... useful for BiCG) */ 112*aa372e3fSPaul Mullowney THRUSTARRAY *workVector; /*pointer to a workvector to which we can copy the relevant indices of a vector we want to multiply */ 1139ae82921SPaul Mullowney PetscInt nonzerorow; /* number of nonzero rows ... used in the flop calculations */ 114e057df02SPaul Mullowney MatCUSPARSEStorageFormat format; /* the storage format for the matrix on the device */ 115*aa372e3fSPaul Mullowney cudaStream_t stream; /* a stream for the parallel SpMV ... this is not owned and should not be deleted */ 116*aa372e3fSPaul Mullowney cusparseHandle_t handle; /* a handle to the cusparse library ... this may not be owned (if we're working in parallel i.e. multiGPUs) */ 1179ae82921SPaul Mullowney }; 1189ae82921SPaul Mullowney 1195a576424SJed Brown PETSC_INTERN PetscErrorCode MatCUSPARSECopyToGPU(Mat); 1209ae82921SPaul Mullowney #endif 121