1 /* 2 Defines the basic matrix operations for the AIJ (compressed row) 3 matrix storage format. 4 */ 5 6 #include "petscconf.h" 7 PETSC_CUDA_EXTERN_C_BEGIN 8 #include "../src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 9 //#include "petscbt.h" 10 #include "../src/vec/vec/impls/dvecimpl.h" 11 #include "petsc-private/vecimpl.h" 12 PETSC_CUDA_EXTERN_C_END 13 #undef VecType 14 #include "cusparsematimpl.h" 15 const char * const MatCUSPARSEStorageFormats[] = {"CSR","ELL","HYB","MatCUSPARSEStorageFormat","MAT_CUSPARSE_",0}; 16 17 /* this is such a hack ... but I don't know of another way to pass this variable 18 from one GPU_Matrix_Ifc class to another. This is necessary for the parallel 19 SpMV. Essentially, I need to use the same stream variable in two different 20 data structures. I do this by creating a single instance of that stream 21 and reuse it. */ 22 cudaStream_t theBodyStream=0; 23 24 PetscErrorCode MatILUFactorSymbolic_SeqAIJCUSPARSE(Mat,Mat,IS,IS,const MatFactorInfo*); 25 PetscErrorCode MatLUFactorSymbolic_SeqAIJCUSPARSE(Mat,Mat,IS,IS,const MatFactorInfo*); 26 PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat,Mat,const MatFactorInfo *); 27 PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat,Vec,Vec); 28 PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec); 29 PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat); 30 PetscErrorCode MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(Mat); 31 PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat,Vec,Vec); 32 PetscErrorCode MatMultAdd_SeqAIJCUSPARSE(Mat,Vec,Vec,Vec); 33 PetscErrorCode MatMultTranspose_SeqAIJCUSPARSE(Mat,Vec,Vec); 34 PetscErrorCode MatMultTransposeAdd_SeqAIJCUSPARSE(Mat,Vec,Vec,Vec); 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_cusparse" 38 PetscErrorCode MatFactorGetSolverPackage_seqaij_cusparse(Mat A,const MatSolverPackage *type) 39 { 40 PetscFunctionBegin; 41 *type = MATSOLVERCUSPARSE; 42 PetscFunctionReturn(0); 43 } 44 45 EXTERN_C_BEGIN 46 extern PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 47 EXTERN_C_END 48 /* 49 MATSOLVERCUSPARSE = "cusparse" - A matrix type providing triangular solvers (ILU) for seq matrices 50 on the GPU of type, seqaijcusparse, aijcusparse, or seqaijcusp, aijcusp 51 52 Level: beginner 53 */ 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatGetFactor_seqaij_cusparse" 58 PetscErrorCode MatGetFactor_seqaij_cusparse(Mat A,MatFactorType ftype,Mat *B) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 ierr = MatGetFactor_seqaij_petsc(A,ftype,B);CHKERRQ(ierr); 64 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 65 ierr = MatSetType(*B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 66 ierr = MatSetFromOptions_SeqAIJCUSPARSE(*B);CHKERRQ(ierr); 67 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*B),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_cusparse",MatFactorGetSolverPackage_seqaij_cusparse);CHKERRQ(ierr); 68 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJCUSPARSE; 69 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJCUSPARSE; 70 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported for CUSPARSE Matrix Types"); 71 (*B)->factortype = ftype; 72 PetscFunctionReturn(0); 73 } 74 EXTERN_C_END 75 76 EXTERN_C_BEGIN 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatCUSPARSESetFormat_SeqAIJCUSPARSE" 79 PetscErrorCode MatCUSPARSESetFormat_SeqAIJCUSPARSE(Mat A,MatCUSPARSEFormatOperation op,MatCUSPARSEStorageFormat format) 80 { 81 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 82 PetscFunctionBegin; 83 switch (op) { 84 case MAT_CUSPARSE_MULT: 85 cusparseMat->format = format; 86 break; 87 case MAT_CUSPARSE_SOLVE: 88 cusparseMatSolveStorageFormat = format; 89 break; 90 case MAT_CUSPARSE_ALL: 91 cusparseMat->format = format; 92 cusparseMatSolveStorageFormat = format; 93 break; 94 default: 95 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsupported operation %d for MatCUSPARSEFormatOperation. MAT_CUSPARSE_MULT, MAT_CUSPARSE_SOLVE, and MAT_CUSPARSE_ALL are currently supported.",op); 96 } 97 PetscFunctionReturn(0); 98 } 99 EXTERN_C_END 100 101 102 /*@ 103 MatCUSPARSESetFormat - Sets the storage format of CUSPARSE matrices for a particular 104 operation. Only the MatMult operation can use different GPU storage formats 105 for AIJCUSPARSE matrices. This requires the txpetscgpu package. Use --download-txpetscgpu 106 to build/install PETSc to use this package. 107 108 Not Collective 109 110 Input Parameters: 111 + A : Matrix of type SEQAIJCUSPARSE 112 . op : MatCUSPARSEFormatOperation. SEQAIJCUSPARSE matrices support MAT_CUSPARSE_MULT, MAT_CUSPARSE_SOLVE, and MAT_CUSPARSE_ALL. MPIAIJCUSPARSE matrices support MAT_CUSPARSE_MULT_DIAG, MAT_CUSPARSE_MULT_OFFDIAG, and MAT_CUSPARSE_ALL. 113 - format : MatCUSPARSEStorageFormat (one of MAT_CUSPARSE_CSR, MAT_CUSPARSE_ELL, MAT_CUSPARSE_HYB) 114 115 Output Parameter: 116 117 Level: intermediate 118 119 .seealso: MatCUSPARSEStorageFormat, MatCUSPARSEARSEFormatOperation 120 @*/ 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatCUSPARSESetFormat" 123 PetscErrorCode MatCUSPARSESetFormat(Mat A,MatCUSPARSEFormatOperation op,MatCUSPARSEStorageFormat format) 124 { 125 PetscErrorCode ierr; 126 PetscFunctionBegin; 127 PetscValidHeaderSpecific(A, MAT_CLASSID,1); 128 ierr = PetscTryMethod(A, "MatCUSPARSESetFormat_C",(Mat,MatCUSPARSEFormatOperation,MatCUSPARSEStorageFormat),(A,op,format));CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetFromOptions_SeqAIJCUSPARSE" 134 PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat A) 135 { 136 PetscErrorCode ierr; 137 MatCUSPARSEStorageFormat format; 138 PetscBool flg; 139 PetscFunctionBegin; 140 ierr = PetscOptionsHead("SeqAIJCUSPARSE options");CHKERRQ(ierr); 141 ierr = PetscObjectOptionsBegin((PetscObject)A); 142 if (A->factortype==MAT_FACTOR_NONE) { 143 ierr = PetscOptionsEnum("-mat_cusparse_mult_storage_format","sets storage format of (seq)aijcusparse gpu matrices for SpMV", 144 "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)format,(PetscEnum*)&format,&flg);CHKERRQ(ierr); 145 if (flg) { 146 ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_MULT,format);CHKERRQ(ierr); 147 } 148 } 149 else { 150 ierr = PetscOptionsEnum("-mat_cusparse_solve_storage_format","sets storage format of (seq)aijcusparse gpu matrices for TriSolve", 151 "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)format,(PetscEnum*)&format,&flg);CHKERRQ(ierr); 152 if (flg) { 153 ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_SOLVE,format);CHKERRQ(ierr); 154 } 155 } 156 ierr = PetscOptionsEnd();CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJCUSPARSE" 163 PetscErrorCode MatILUFactorSymbolic_SeqAIJCUSPARSE(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 164 { 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 ierr = MatILUFactorSymbolic_SeqAIJ(B,A,isrow,iscol,info);CHKERRQ(ierr); 169 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJCUSPARSE; 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJCUSPARSE" 175 PetscErrorCode MatLUFactorSymbolic_SeqAIJCUSPARSE(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 176 { 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = MatLUFactorSymbolic_SeqAIJ(B,A,isrow,iscol,info);CHKERRQ(ierr); 181 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJCUSPARSE; 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSeqAIJCUSPARSEBuildLowerTriMatrix" 187 PetscErrorCode MatSeqAIJCUSPARSEBuildLowerTriMatrix(Mat A) 188 { 189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 190 PetscInt n = A->rmap->n; 191 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 192 GPU_Matrix_Ifc* cusparseMat = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 193 cusparseStatus_t stat; 194 const PetscInt *ai = a->i,*aj = a->j,*vi; 195 const MatScalar *aa = a->a,*v; 196 PetscErrorCode ierr; 197 PetscInt *AiLo, *AjLo; 198 PetscScalar *AALo; 199 PetscInt i,nz, nzLower, offset, rowOffset; 200 201 PetscFunctionBegin; 202 if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ 203 try { 204 /* first figure out the number of nonzeros in the lower triangular matrix including 1's on the diagonal. */ 205 nzLower=n+ai[n]-ai[1]; 206 207 /* Allocate Space for the lower triangular matrix */ 208 ierr = cudaMallocHost((void **) &AiLo, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr); 209 ierr = cudaMallocHost((void **) &AjLo, nzLower*sizeof(PetscInt));CHKERRCUSP(ierr); 210 ierr = cudaMallocHost((void **) &AALo, nzLower*sizeof(PetscScalar));CHKERRCUSP(ierr); 211 212 /* Fill the lower triangular matrix */ 213 AiLo[0]=(PetscInt) 0; 214 AiLo[n]=nzLower; 215 AjLo[0]=(PetscInt) 0; 216 AALo[0]=(MatScalar) 1.0; 217 v = aa; 218 vi = aj; 219 offset=1; 220 rowOffset=1; 221 for (i=1; i<n; i++) { 222 nz = ai[i+1] - ai[i]; 223 /* additional 1 for the term on the diagonal */ 224 AiLo[i]=rowOffset; 225 rowOffset+=nz+1; 226 227 ierr = PetscMemcpy(&(AjLo[offset]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr); 228 ierr = PetscMemcpy(&(AALo[offset]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); 229 230 offset+=nz; 231 AjLo[offset]=(PetscInt) i; 232 AALo[offset]=(MatScalar) 1.0; 233 offset+=1; 234 235 v += nz; 236 vi += nz; 237 } 238 cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); 239 stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat); 240 ierr = cusparseMat->setMatrix(n, n, nzLower, AiLo, AjLo, AALo);CHKERRCUSP(ierr); 241 stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); 242 ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->loTriFactorPtr = cusparseMat; 243 ierr = cudaFreeHost(AiLo);CHKERRCUSP(ierr); 244 ierr = cudaFreeHost(AjLo);CHKERRCUSP(ierr); 245 ierr = cudaFreeHost(AALo);CHKERRCUSP(ierr); 246 } catch(char* ex) { 247 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSeqAIJCUSPARSEBuildUpperTriMatrix" 255 PetscErrorCode MatSeqAIJCUSPARSEBuildUpperTriMatrix(Mat A) 256 { 257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 258 PetscInt n = A->rmap->n; 259 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 260 GPU_Matrix_Ifc* cusparseMat = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 261 cusparseStatus_t stat; 262 const PetscInt *aj = a->j,*adiag = a->diag,*vi; 263 const MatScalar *aa = a->a,*v; 264 PetscInt *AiUp, *AjUp; 265 PetscScalar *AAUp; 266 PetscInt i,nz, nzUpper, offset; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 271 if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ 272 try { 273 /* next, figure out the number of nonzeros in the upper triangular matrix. */ 274 nzUpper = adiag[0]-adiag[n]; 275 276 /* Allocate Space for the upper triangular matrix */ 277 ierr = cudaMallocHost((void **) &AiUp, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr); 278 ierr = cudaMallocHost((void **) &AjUp, nzUpper*sizeof(PetscInt));CHKERRCUSP(ierr); 279 ierr = cudaMallocHost((void **) &AAUp, nzUpper*sizeof(PetscScalar));CHKERRCUSP(ierr); 280 281 /* Fill the upper triangular matrix */ 282 AiUp[0]=(PetscInt) 0; 283 AiUp[n]=nzUpper; 284 offset = nzUpper; 285 for (i=n-1; i>=0; i--){ 286 v = aa + adiag[i+1] + 1; 287 vi = aj + adiag[i+1] + 1; 288 289 /* number of elements NOT on the diagonal */ 290 nz = adiag[i] - adiag[i+1]-1; 291 292 /* decrement the offset */ 293 offset -= (nz+1); 294 295 /* first, set the diagonal elements */ 296 AjUp[offset] = (PetscInt) i; 297 AAUp[offset] = 1./v[nz]; 298 AiUp[i] = AiUp[i+1] - (nz+1); 299 300 ierr = PetscMemcpy(&(AjUp[offset+1]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr); 301 ierr = PetscMemcpy(&(AAUp[offset+1]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); 302 } 303 cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); 304 stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); 305 ierr = cusparseMat->setMatrix(n, n, nzUpper, AiUp, AjUp, AAUp);CHKERRCUSP(ierr); 306 stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); 307 ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->upTriFactorPtr = cusparseMat; 308 ierr = cudaFreeHost(AiUp);CHKERRCUSP(ierr); 309 ierr = cudaFreeHost(AjUp);CHKERRCUSP(ierr); 310 ierr = cudaFreeHost(AAUp);CHKERRCUSP(ierr); 311 } catch(char* ex) { 312 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 313 } 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSeqAIJCUSPARSEAnalysiAndCopyToGPU" 320 PetscErrorCode MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(Mat A) 321 { 322 PetscErrorCode ierr; 323 Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 324 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 325 IS isrow = a->row,iscol = a->icol; 326 PetscBool row_identity,col_identity; 327 const PetscInt *r,*c; 328 PetscInt n = A->rmap->n; 329 330 PetscFunctionBegin; 331 ierr = MatSeqAIJCUSPARSEBuildLowerTriMatrix(A);CHKERRQ(ierr); 332 ierr = MatSeqAIJCUSPARSEBuildUpperTriMatrix(A);CHKERRQ(ierr); 333 cusparseTriFactors->tempvec = new CUSPARRAY; 334 cusparseTriFactors->tempvec->resize(n); 335 336 A->valid_GPU_matrix = PETSC_CUSP_BOTH; 337 /*lower triangular indices */ 338 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 339 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 340 if (!row_identity) 341 ierr = cusparseTriFactors->loTriFactorPtr->setOrdIndices(r, n);CHKERRCUSP(ierr); 342 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 343 344 /*upper triangular indices */ 345 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 346 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 347 if (!col_identity) 348 ierr = cusparseTriFactors->upTriFactorPtr->setOrdIndices(c, n);CHKERRCUSP(ierr); 349 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJCUSPARSE" 355 PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat B,Mat A,const MatFactorInfo *info) 356 { 357 PetscErrorCode ierr; 358 Mat_SeqAIJ *b=(Mat_SeqAIJ *)B->data; 359 IS isrow = b->row,iscol = b->col; 360 PetscBool row_identity,col_identity; 361 362 PetscFunctionBegin; 363 ierr = MatLUFactorNumeric_SeqAIJ(B,A,info);CHKERRQ(ierr); 364 /* determine which version of MatSolve needs to be used. */ 365 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 366 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 367 if (row_identity && col_identity) B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering; 368 else B->ops->solve = MatSolve_SeqAIJCUSPARSE; 369 370 /* get the triangular factors */ 371 ierr = MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(B);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE" 379 PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat A,Vec bb,Vec xx) 380 { 381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 382 PetscErrorCode ierr; 383 CUSPARRAY *xGPU, *bGPU; 384 cusparseStatus_t stat; 385 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 386 GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 387 GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 388 CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; 389 390 PetscFunctionBegin; 391 /* Get the GPU pointers */ 392 ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); 393 ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); 394 395 /* solve with reordering */ 396 ierr = cusparseMatLo->reorderIn(xGPU, bGPU);CHKERRCUSP(ierr); 397 stat = cusparseMatLo->solve(xGPU, tempGPU);CHKERRCUSP(stat); 398 stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat); 399 ierr = cusparseMatUp->reorderOut(xGPU);CHKERRCUSP(ierr); 400 401 ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); 402 ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); 403 ierr = WaitForGPU();CHKERRCUSP(ierr); 404 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 409 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE_NaturalOrdering" 413 PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat A,Vec bb,Vec xx) 414 { 415 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 416 PetscErrorCode ierr; 417 CUSPARRAY *xGPU, *bGPU; 418 cusparseStatus_t stat; 419 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 420 GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 421 GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 422 CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; 423 424 PetscFunctionBegin; 425 /* Get the GPU pointers */ 426 ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); 427 ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); 428 429 /* solve */ 430 stat = cusparseMatLo->solve(bGPU, tempGPU);CHKERRCUSP(stat); 431 stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat); 432 433 ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); 434 ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); 435 ierr = WaitForGPU();CHKERRCUSP(ierr); 436 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatSeqAIJCUSPARSECopyToGPU" 442 PetscErrorCode MatSeqAIJCUSPARSECopyToGPU(Mat A) 443 { 444 445 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 447 PetscInt m = A->rmap->n,*ii,*ridx; 448 PetscErrorCode ierr; 449 450 451 PetscFunctionBegin; 452 if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ 453 ierr = PetscLogEventBegin(MAT_CUSPARSECopyToGPU,A,0,0,0);CHKERRQ(ierr); 454 /* 455 It may be possible to reuse nonzero structure with new matrix values but 456 for simplicity and insured correctness we delete and build a new matrix on 457 the GPU. Likely a very small performance hit. 458 */ 459 if (cusparseMat->mat){ 460 try { 461 delete cusparseMat->mat; 462 if (cusparseMat->tempvec) 463 delete cusparseMat->tempvec; 464 465 } catch(char* ex) { 466 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 467 } 468 } 469 try { 470 cusparseMat->nonzerorow=0; 471 for (int j = 0; j<m; j++) 472 cusparseMat->nonzerorow += ((a->i[j+1]-a->i[j])>0); 473 474 if (a->compressedrow.use) { 475 m = a->compressedrow.nrows; 476 ii = a->compressedrow.i; 477 ridx = a->compressedrow.rindex; 478 } else { 479 /* Forcing compressed row on the GPU ... only relevant for CSR storage */ 480 int k=0; 481 ierr = PetscMalloc((cusparseMat->nonzerorow+1)*sizeof(PetscInt), &ii);CHKERRQ(ierr); 482 ierr = PetscMalloc((cusparseMat->nonzerorow)*sizeof(PetscInt), &ridx);CHKERRQ(ierr); 483 ii[0]=0; 484 for (int j = 0; j<m; j++) { 485 if ((a->i[j+1]-a->i[j])>0) { 486 ii[k] = a->i[j]; 487 ridx[k]= j; 488 k++; 489 } 490 } 491 ii[cusparseMat->nonzerorow] = a->nz; 492 m = cusparseMat->nonzerorow; 493 } 494 495 /* Build our matrix ... first determine the GPU storage type */ 496 cusparseMat->mat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseMat->format]); 497 498 /* Create the streams and events (if desired). */ 499 PetscMPIInt size; 500 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 501 ierr = cusparseMat->mat->buildStreamsAndEvents(size, &theBodyStream);CHKERRCUSP(ierr); 502 503 /* FILL MODE UPPER is irrelevant */ 504 cusparseStatus_t stat = cusparseMat->mat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); 505 506 /* lastly, build the matrix */ 507 ierr = cusparseMat->mat->setMatrix(m, A->cmap->n, a->nz, ii, a->j, a->a);CHKERRCUSP(ierr); 508 cusparseMat->mat->setCPRowIndices(ridx, m); 509 if (!a->compressedrow.use) { 510 ierr = PetscFree(ii);CHKERRQ(ierr); 511 ierr = PetscFree(ridx);CHKERRQ(ierr); 512 } 513 cusparseMat->tempvec = new CUSPARRAY; 514 cusparseMat->tempvec->resize(m); 515 } catch(char* ex) { 516 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 517 } 518 ierr = WaitForGPU();CHKERRCUSP(ierr); 519 A->valid_GPU_matrix = PETSC_CUSP_BOTH; 520 ierr = PetscLogEventEnd(MAT_CUSPARSECopyToGPU,A,0,0,0);CHKERRQ(ierr); 521 } 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatGetVecs_SeqAIJCUSPARSE" 527 PetscErrorCode MatGetVecs_SeqAIJCUSPARSE(Mat mat, Vec *right, Vec *left) 528 { 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 533 if (right) { 534 ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 535 ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 536 ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 537 ierr = VecSetType(*right,VECSEQCUSP);CHKERRQ(ierr); 538 ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 539 } 540 if (left) { 541 ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 542 ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 543 ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 544 ierr = VecSetType(*left,VECSEQCUSP);CHKERRQ(ierr); 545 ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatMult_SeqAIJCUSPARSE" 552 PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy) 553 { 554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 555 PetscErrorCode ierr; 556 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; 557 CUSPARRAY *xarray,*yarray; 558 559 PetscFunctionBegin; 560 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 561 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 562 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 563 ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr); 564 try { 565 ierr = cusparseMat->mat->multiply(xarray, yarray);CHKERRCUSP(ierr); 566 } catch (char* ex) { 567 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 568 } 569 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 570 ierr = VecCUSPRestoreArrayWrite(yy,&yarray);CHKERRQ(ierr); 571 if (!cusparseMat->mat->hasNonZeroStream()) { 572 ierr = WaitForGPU();CHKERRCUSP(ierr); 573 } 574 ierr = PetscLogFlops(2.0*a->nz - cusparseMat->nonzerorow);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMultTranspose_SeqAIJCUSPARSE" 581 PetscErrorCode MatMultTranspose_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy) 582 { 583 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 584 PetscErrorCode ierr; 585 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; 586 CUSPARRAY *xarray,*yarray; 587 588 PetscFunctionBegin; 589 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 590 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 591 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 592 ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr); 593 try { 594 #if !defined(PETSC_USE_COMPLEX) 595 ierr = cusparseMat->mat->multiply(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); 596 #else 597 ierr = cusparseMat->mat->multiply(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); 598 #endif 599 } catch (char* ex) { 600 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 601 } 602 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 603 ierr = VecCUSPRestoreArrayWrite(yy,&yarray);CHKERRQ(ierr); 604 if (!cusparseMat->mat->hasNonZeroStream()) { 605 ierr = WaitForGPU();CHKERRCUSP(ierr); 606 } 607 ierr = PetscLogFlops(2.0*a->nz - cusparseMat->nonzerorow);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatMultAdd_SeqAIJCUSPARSE" 613 PetscErrorCode MatMultAdd_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy,Vec zz) 614 { 615 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 616 PetscErrorCode ierr; 617 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; 618 CUSPARRAY *xarray,*yarray,*zarray; 619 PetscFunctionBegin; 620 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 621 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 622 try { 623 ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); 624 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 625 ierr = VecCUSPGetArrayRead(yy,&yarray);CHKERRQ(ierr); 626 ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr); 627 628 /* multiply add */ 629 ierr = cusparseMat->mat->multiplyAdd(xarray, zarray);CHKERRCUSP(ierr); 630 631 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 632 ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr); 633 ierr = VecCUSPRestoreArrayWrite(zz,&zarray);CHKERRQ(ierr); 634 635 } catch(char* ex) { 636 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 637 } 638 ierr = WaitForGPU();CHKERRCUSP(ierr); 639 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatMultAdd_SeqAIJCUSPARSE" 645 PetscErrorCode MatMultTransposeAdd_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy,Vec zz) 646 { 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 PetscErrorCode ierr; 649 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; 650 CUSPARRAY *xarray,*yarray,*zarray; 651 PetscFunctionBegin; 652 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 653 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 654 try { 655 ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); 656 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 657 ierr = VecCUSPGetArrayRead(yy,&yarray);CHKERRQ(ierr); 658 ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr); 659 660 /* multiply add with matrix transpose */ 661 #if !defined(PETSC_USE_COMPLEX) 662 ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); 663 #else 664 ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); 665 #endif 666 667 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 668 ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr); 669 ierr = VecCUSPRestoreArrayWrite(zz,&zarray);CHKERRQ(ierr); 670 671 } catch(char* ex) { 672 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 673 } 674 ierr = WaitForGPU();CHKERRCUSP(ierr); 675 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatAssemblyEnd_SeqAIJCUSPARSE" 681 PetscErrorCode MatAssemblyEnd_SeqAIJCUSPARSE(Mat A,MatAssemblyType mode) 682 { 683 PetscErrorCode ierr; 684 PetscFunctionBegin; 685 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 686 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); 687 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 688 A->ops->mult = MatMult_SeqAIJCUSPARSE; 689 A->ops->multadd = MatMultAdd_SeqAIJCUSPARSE; 690 A->ops->multtranspose = MatMultTranspose_SeqAIJCUSPARSE; 691 A->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJCUSPARSE; 692 PetscFunctionReturn(0); 693 } 694 695 /* --------------------------------------------------------------------------------*/ 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatCreateSeqAIJCUSPARSE" 698 /*@ 699 MatCreateSeqAIJCUSPARSE - Creates a sparse matrix in AIJ (compressed row) format 700 (the default parallel PETSc format). This matrix will ultimately pushed down 701 to NVidia GPUs and use the CUSPARSE library for calculations. For good matrix 702 assembly performance the user should preallocate the matrix storage by setting 703 the parameter nz (or the array nnz). By setting these parameters accurately, 704 performance during matrix assembly can be increased by more than a factor of 50. 705 706 Collective on MPI_Comm 707 708 Input Parameters: 709 + comm - MPI communicator, set to PETSC_COMM_SELF 710 . m - number of rows 711 . n - number of columns 712 . nz - number of nonzeros per row (same for all rows) 713 - nnz - array containing the number of nonzeros in the various rows 714 (possibly different for each row) or PETSC_NULL 715 716 Output Parameter: 717 . A - the matrix 718 719 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 720 MatXXXXSetPreallocation() paradgm instead of this routine directly. 721 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 722 723 Notes: 724 If nnz is given then nz is ignored 725 726 The AIJ format (also called the Yale sparse matrix format or 727 compressed row storage), is fully compatible with standard Fortran 77 728 storage. That is, the stored row and column indices can begin at 729 either one (as in Fortran) or zero. See the users' manual for details. 730 731 Specify the preallocated storage with either nz or nnz (not both). 732 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 733 allocation. For large problems you MUST preallocate memory or you 734 will get TERRIBLE performance, see the users' manual chapter on matrices. 735 736 By default, this format uses inodes (identical nodes) when possible, to 737 improve numerical efficiency of matrix-vector products and solves. We 738 search for consecutive rows with the same nonzero structure, thereby 739 reusing matrix information to achieve increased efficiency. 740 741 Level: intermediate 742 743 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSEQAIJCUSPARSE, MATAIJCUSPARSE 744 @*/ 745 PetscErrorCode MatCreateSeqAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 746 { 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = MatCreate(comm,A);CHKERRQ(ierr); 751 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 752 ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatDestroy_SeqAIJCUSPARSE" 759 PetscErrorCode MatDestroy_SeqAIJCUSPARSE(Mat A) 760 { 761 PetscErrorCode ierr; 762 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 763 764 PetscFunctionBegin; 765 if (A->factortype==MAT_FACTOR_NONE) { 766 try { 767 if (A->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED){ 768 delete (GPU_Matrix_Ifc *)(cusparseMat->mat); 769 } 770 if (cusparseMat->tempvec!=0) 771 delete cusparseMat->tempvec; 772 delete cusparseMat; 773 A->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED; 774 } catch(char* ex) { 775 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 776 } 777 } else { 778 /* The triangular factors */ 779 try { 780 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 781 GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 782 GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 783 delete (GPU_Matrix_Ifc *)cusparseMatLo; 784 delete (GPU_Matrix_Ifc *)cusparseMatUp; 785 delete (CUSPARRAY*) cusparseTriFactors->tempvec; 786 delete cusparseTriFactors; 787 } catch(char* ex) { 788 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 789 } 790 } 791 if (MAT_cusparseHandle) { 792 cusparseStatus_t stat; 793 stat = cusparseDestroy(MAT_cusparseHandle);CHKERRCUSP(stat); 794 MAT_cusparseHandle=0; 795 } 796 /*this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 797 A->spptr = 0; 798 799 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } 802 803 EXTERN_C_BEGIN 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatCreate_SeqAIJCUSPARSE" 806 PetscErrorCode MatCreate_SeqAIJCUSPARSE(Mat B) 807 { 808 PetscErrorCode ierr; 809 810 PetscFunctionBegin; 811 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 812 if (B->factortype==MAT_FACTOR_NONE) { 813 /* you cannot check the inode.use flag here since the matrix was just created. 814 now build a GPU matrix data structure */ 815 B->spptr = new Mat_SeqAIJCUSPARSE; 816 ((Mat_SeqAIJCUSPARSE *)B->spptr)->mat = 0; 817 ((Mat_SeqAIJCUSPARSE *)B->spptr)->tempvec = 0; 818 ((Mat_SeqAIJCUSPARSE *)B->spptr)->format = MAT_CUSPARSE_CSR; 819 } else { 820 /* NEXT, set the pointers to the triangular factors */ 821 B->spptr = new Mat_SeqAIJCUSPARSETriFactors; 822 ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->loTriFactorPtr = 0; 823 ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->upTriFactorPtr = 0; 824 ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->tempvec = 0; 825 ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->format = cusparseMatSolveStorageFormat; 826 } 827 /* Create a single instance of the MAT_cusparseHandle for any matrix (matMult, TriSolve, ...) */ 828 if (!MAT_cusparseHandle) { 829 cusparseStatus_t stat; 830 stat = cusparseCreate(&MAT_cusparseHandle);CHKERRCUSP(stat); 831 } 832 /* Here we overload MatGetFactor_petsc_C which enables -mat_type aijcusparse to use the 833 default cusparse tri solve. Note the difference with the implementation in 834 MatCreate_SeqAIJCUSP in ../seqcusp/aijcusp.cu */ 835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqaij_cusparse",MatGetFactor_seqaij_cusparse);CHKERRQ(ierr); 836 B->ops->assemblyend = MatAssemblyEnd_SeqAIJCUSPARSE; 837 B->ops->destroy = MatDestroy_SeqAIJCUSPARSE; 838 B->ops->getvecs = MatGetVecs_SeqAIJCUSPARSE; 839 B->ops->setfromoptions = MatSetFromOptions_SeqAIJCUSPARSE; 840 B->ops->mult = MatMult_SeqAIJCUSPARSE; 841 B->ops->multadd = MatMultAdd_SeqAIJCUSPARSE; 842 B->ops->multtranspose = MatMultTranspose_SeqAIJCUSPARSE; 843 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJCUSPARSE; 844 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 845 B->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED; 846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B, "MatCUSPARSESetFormat_C", "MatCUSPARSESetFormat_SeqAIJCUSPARSE", MatCUSPARSESetFormat_SeqAIJCUSPARSE);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 EXTERN_C_END 850 851 /*M 852 MATSEQAIJCUSPARSE - MATAIJCUSPARSE = "(seq)aijcusparse" - A matrix type to be used for sparse matrices. 853 854 A matrix type type whose data resides on Nvidia GPUs. These matrices can be in either 855 CSR, ELL, or Hybrid format. All matrix calculations are performed on Nvidia GPUs using 856 the CUSPARSE library. This type is only available when using the 'txpetscgpu' package. 857 Use --download-txpetscgpu to build/install PETSc to use different CUSPARSE library and 858 the different GPU storage formats. 859 860 Options Database Keys: 861 + -mat_type aijcusparse - sets the matrix type to "seqaijcusparse" during a call to MatSetFromOptions() 862 . -mat_cusparse_storage_format csr (ell (ellpack) or hyb (hybrid)) sets the storage format of matrices (for MatMult and factors in MatSolve) during a call to MatSetFromOptions(). Only available with 'txpetscgpu' package. 863 . -mat_cusparse_mult_storage_format csr (ell (ellpack) or hyb (hybrid)) sets the storage format of matrices (for MatMult) during a call to MatSetFromOptions(). Only available with 'txpetscgpu' package. 864 - -mat_cusparse_solve_storage_format csr (ell (ellpack) or hyb (hybrid)) sets the storage format matrices (for factors in MatSolve) during a call to MatSetFromOptions(). Only available with 'txpetscgpu' package. 865 866 Level: beginner 867 868 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE, MatCUSPARSESetFormat(), MatCUSPARSEStorageFormat, MatCUSPARSEFormatOperation 869 M*/ 870