1 /* 2 Defines the basic matrix operations for the AIJ (compressed row) 3 matrix storage format. 4 */ 5 6 #include "petscconf.h" 7 PETSC_CUDA_EXTERN_C_BEGIN 8 #include "../src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 9 //#include "petscbt.h" 10 #include "../src/vec/vec/impls/dvecimpl.h" 11 #include "petsc-private/vecimpl.h" 12 PETSC_CUDA_EXTERN_C_END 13 #undef VecType 14 #include "cusparsematimpl.h" 15 const char *const MatCUSPARSEStorageFormats[] = {"CSR","ELL","HYB","MatCUSPARSEStorageFormat","MAT_CUSPARSE_",0}; 16 17 /* this is such a hack ... but I don't know of another way to pass this variable 18 from one GPU_Matrix_Ifc class to another. This is necessary for the parallel 19 SpMV. Essentially, I need to use the same stream variable in two different 20 data structures. I do this by creating a single instance of that stream 21 and reuse it. */ 22 cudaStream_t theBodyStream=0; 23 24 PetscErrorCode MatILUFactorSymbolic_SeqAIJCUSPARSE(Mat,Mat,IS,IS,const MatFactorInfo*); 25 PetscErrorCode MatLUFactorSymbolic_SeqAIJCUSPARSE(Mat,Mat,IS,IS,const MatFactorInfo*); 26 PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat,Mat,const MatFactorInfo*); 27 PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat,Vec,Vec); 28 PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec); 29 PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat); 30 PetscErrorCode MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(Mat); 31 PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat,Vec,Vec); 32 PetscErrorCode MatMultAdd_SeqAIJCUSPARSE(Mat,Vec,Vec,Vec); 33 PetscErrorCode MatMultTranspose_SeqAIJCUSPARSE(Mat,Vec,Vec); 34 PetscErrorCode MatMultTransposeAdd_SeqAIJCUSPARSE(Mat,Vec,Vec,Vec); 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_cusparse" 38 PetscErrorCode MatFactorGetSolverPackage_seqaij_cusparse(Mat A,const MatSolverPackage *type) 39 { 40 PetscFunctionBegin; 41 *type = MATSOLVERCUSPARSE; 42 PetscFunctionReturn(0); 43 } 44 45 EXTERN_C_BEGIN 46 extern PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 47 EXTERN_C_END 48 /* 49 MATSOLVERCUSPARSE = "cusparse" - A matrix type providing triangular solvers (ILU) for seq matrices 50 on the GPU of type, seqaijcusparse, aijcusparse, or seqaijcusp, aijcusp 51 52 Level: beginner 53 */ 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatGetFactor_seqaij_cusparse" 58 PetscErrorCode MatGetFactor_seqaij_cusparse(Mat A,MatFactorType ftype,Mat *B) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 ierr = MatGetFactor_seqaij_petsc(A,ftype,B);CHKERRQ(ierr); 64 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 65 ierr = MatSetType(*B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 66 ierr = MatSetFromOptions_SeqAIJCUSPARSE(*B);CHKERRQ(ierr); 67 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*B),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_cusparse",MatFactorGetSolverPackage_seqaij_cusparse);CHKERRQ(ierr); 68 69 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJCUSPARSE; 70 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJCUSPARSE; 71 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported for CUSPARSE Matrix Types"); 72 (*B)->factortype = ftype; 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 EXTERN_C_BEGIN 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatCUSPARSESetFormat_SeqAIJCUSPARSE" 80 PetscErrorCode MatCUSPARSESetFormat_SeqAIJCUSPARSE(Mat A,MatCUSPARSEFormatOperation op,MatCUSPARSEStorageFormat format) 81 { 82 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 83 84 PetscFunctionBegin; 85 switch (op) { 86 case MAT_CUSPARSE_MULT: 87 cusparseMat->format = format; 88 break; 89 case MAT_CUSPARSE_SOLVE: 90 cusparseMatSolveStorageFormat = format; 91 break; 92 case MAT_CUSPARSE_ALL: 93 cusparseMat->format = format; 94 cusparseMatSolveStorageFormat = format; 95 break; 96 default: 97 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsupported operation %d for MatCUSPARSEFormatOperation. MAT_CUSPARSE_MULT, MAT_CUSPARSE_SOLVE, and MAT_CUSPARSE_ALL are currently supported.",op); 98 } 99 PetscFunctionReturn(0); 100 } 101 EXTERN_C_END 102 103 104 /*@ 105 MatCUSPARSESetFormat - Sets the storage format of CUSPARSE matrices for a particular 106 operation. Only the MatMult operation can use different GPU storage formats 107 for MPIAIJCUSPARSE matrices. This requires the txpetscgpu package. Use --download-txpetscgpu 108 to build/install PETSc to use this package. 109 110 Not Collective 111 112 Input Parameters: 113 + A - Matrix of type SEQAIJCUSPARSE 114 . op - MatCUSPARSEFormatOperation. SEQAIJCUSPARSE matrices support MAT_CUSPARSE_MULT, MAT_CUSPARSE_SOLVE, and MAT_CUSPARSE_ALL. MPIAIJCUSPARSE matrices support MAT_CUSPARSE_MULT_DIAG, MAT_CUSPARSE_MULT_OFFDIAG, and MAT_CUSPARSE_ALL. 115 - format - MatCUSPARSEStorageFormat (one of MAT_CUSPARSE_CSR, MAT_CUSPARSE_ELL, MAT_CUSPARSE_HYB) 116 117 Output Parameter: 118 119 Level: intermediate 120 121 .seealso: MatCUSPARSEStorageFormat, MatCUSPARSEFormatOperation 122 @*/ 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatCUSPARSESetFormat" 125 PetscErrorCode MatCUSPARSESetFormat(Mat A,MatCUSPARSEFormatOperation op,MatCUSPARSEStorageFormat format) 126 { 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 PetscValidHeaderSpecific(A, MAT_CLASSID,1); 131 ierr = PetscTryMethod(A, "MatCUSPARSESetFormat_C",(Mat,MatCUSPARSEFormatOperation,MatCUSPARSEStorageFormat),(A,op,format));CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetFromOptions_SeqAIJCUSPARSE" 137 PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat A) 138 { 139 PetscErrorCode ierr; 140 MatCUSPARSEStorageFormat format; 141 PetscBool flg; 142 143 PetscFunctionBegin; 144 ierr = PetscOptionsHead("SeqAIJCUSPARSE options");CHKERRQ(ierr); 145 ierr = PetscObjectOptionsBegin((PetscObject)A); 146 if (A->factortype==MAT_FACTOR_NONE) { 147 ierr = PetscOptionsEnum("-mat_cusparse_mult_storage_format","sets storage format of (seq)aijcusparse gpu matrices for SpMV", 148 "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)MAT_CUSPARSE_CSR,(PetscEnum*)&format,&flg);CHKERRQ(ierr); 149 if (flg) { 150 ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_MULT,format);CHKERRQ(ierr); 151 } 152 } else { 153 ierr = PetscOptionsEnum("-mat_cusparse_solve_storage_format","sets storage format of (seq)aijcusparse gpu matrices for TriSolve", 154 "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)MAT_CUSPARSE_CSR,(PetscEnum*)&format,&flg);CHKERRQ(ierr); 155 if (flg) { 156 ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_SOLVE,format);CHKERRQ(ierr); 157 } 158 } 159 ierr = PetscOptionsEnum("-mat_cusparse_storage_format","sets storage format of (seq)aijcusparse gpu matrices for SpMV and TriSolve", 160 "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)MAT_CUSPARSE_CSR,(PetscEnum*)&format,&flg);CHKERRQ(ierr); 161 if (flg) { 162 ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_ALL,format);CHKERRQ(ierr); 163 } 164 ierr = PetscOptionsEnd();CHKERRQ(ierr); 165 PetscFunctionReturn(0); 166 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJCUSPARSE" 171 PetscErrorCode MatILUFactorSymbolic_SeqAIJCUSPARSE(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 172 { 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatILUFactorSymbolic_SeqAIJ(B,A,isrow,iscol,info);CHKERRQ(ierr); 177 178 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJCUSPARSE; 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJCUSPARSE" 184 PetscErrorCode MatLUFactorSymbolic_SeqAIJCUSPARSE(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 185 { 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatLUFactorSymbolic_SeqAIJ(B,A,isrow,iscol,info);CHKERRQ(ierr); 190 191 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJCUSPARSE; 192 PetscFunctionReturn(0); 193 } 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatSeqAIJCUSPARSEBuildLowerTriMatrix" 197 PetscErrorCode MatSeqAIJCUSPARSEBuildLowerTriMatrix(Mat A) 198 { 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt n = A->rmap->n; 201 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 202 GPU_Matrix_Ifc * cusparseMat = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 203 cusparseStatus_t stat; 204 const PetscInt *ai = a->i,*aj = a->j,*vi; 205 const MatScalar *aa = a->a,*v; 206 PetscErrorCode ierr; 207 PetscInt *AiLo, *AjLo; 208 PetscScalar *AALo; 209 PetscInt i,nz, nzLower, offset, rowOffset; 210 211 PetscFunctionBegin; 212 if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU) { 213 try { 214 /* first figure out the number of nonzeros in the lower triangular matrix including 1's on the diagonal. */ 215 nzLower=n+ai[n]-ai[1]; 216 217 /* Allocate Space for the lower triangular matrix */ 218 ierr = cudaMallocHost((void**) &AiLo, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr); 219 ierr = cudaMallocHost((void**) &AjLo, nzLower*sizeof(PetscInt));CHKERRCUSP(ierr); 220 ierr = cudaMallocHost((void**) &AALo, nzLower*sizeof(PetscScalar));CHKERRCUSP(ierr); 221 222 /* Fill the lower triangular matrix */ 223 AiLo[0] = (PetscInt) 0; 224 AiLo[n] = nzLower; 225 AjLo[0] = (PetscInt) 0; 226 AALo[0] = (MatScalar) 1.0; 227 v = aa; 228 vi = aj; 229 offset = 1; 230 rowOffset= 1; 231 for (i=1; i<n; i++) { 232 nz = ai[i+1] - ai[i]; 233 /* additional 1 for the term on the diagonal */ 234 AiLo[i] = rowOffset; 235 rowOffset += nz+1; 236 237 ierr = PetscMemcpy(&(AjLo[offset]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = PetscMemcpy(&(AALo[offset]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); 239 240 offset += nz; 241 AjLo[offset] = (PetscInt) i; 242 AALo[offset] = (MatScalar) 1.0; 243 offset += 1; 244 245 v += nz; 246 vi += nz; 247 } 248 cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); 249 250 stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat); 251 ierr = cusparseMat->setMatrix(n, n, nzLower, AiLo, AjLo, AALo);CHKERRCUSP(ierr); 252 stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); 253 254 ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->loTriFactorPtr = cusparseMat; 255 256 ierr = cudaFreeHost(AiLo);CHKERRCUSP(ierr); 257 ierr = cudaFreeHost(AjLo);CHKERRCUSP(ierr); 258 ierr = cudaFreeHost(AALo);CHKERRCUSP(ierr); 259 } catch(char* ex) { 260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 261 } 262 } 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatSeqAIJCUSPARSEBuildUpperTriMatrix" 268 PetscErrorCode MatSeqAIJCUSPARSEBuildUpperTriMatrix(Mat A) 269 { 270 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 271 PetscInt n = A->rmap->n; 272 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 273 GPU_Matrix_Ifc * cusparseMat = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 274 cusparseStatus_t stat; 275 const PetscInt *aj = a->j,*adiag = a->diag,*vi; 276 const MatScalar *aa = a->a,*v; 277 PetscInt *AiUp, *AjUp; 278 PetscScalar *AAUp; 279 PetscInt i,nz, nzUpper, offset; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU) { 284 try { 285 /* next, figure out the number of nonzeros in the upper triangular matrix. */ 286 nzUpper = adiag[0]-adiag[n]; 287 288 /* Allocate Space for the upper triangular matrix */ 289 ierr = cudaMallocHost((void**) &AiUp, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr); 290 ierr = cudaMallocHost((void**) &AjUp, nzUpper*sizeof(PetscInt));CHKERRCUSP(ierr); 291 ierr = cudaMallocHost((void**) &AAUp, nzUpper*sizeof(PetscScalar));CHKERRCUSP(ierr); 292 293 /* Fill the upper triangular matrix */ 294 AiUp[0]=(PetscInt) 0; 295 AiUp[n]=nzUpper; 296 offset = nzUpper; 297 for (i=n-1; i>=0; i--) { 298 v = aa + adiag[i+1] + 1; 299 vi = aj + adiag[i+1] + 1; 300 301 /* number of elements NOT on the diagonal */ 302 nz = adiag[i] - adiag[i+1]-1; 303 304 /* decrement the offset */ 305 offset -= (nz+1); 306 307 /* first, set the diagonal elements */ 308 AjUp[offset] = (PetscInt) i; 309 AAUp[offset] = 1./v[nz]; 310 AiUp[i] = AiUp[i+1] - (nz+1); 311 312 ierr = PetscMemcpy(&(AjUp[offset+1]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscMemcpy(&(AAUp[offset+1]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); 314 } 315 cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); 316 317 stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); 318 ierr = cusparseMat->setMatrix(n, n, nzUpper, AiUp, AjUp, AAUp);CHKERRCUSP(ierr); 319 stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); 320 321 ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->upTriFactorPtr = cusparseMat; 322 323 ierr = cudaFreeHost(AiUp);CHKERRCUSP(ierr); 324 ierr = cudaFreeHost(AjUp);CHKERRCUSP(ierr); 325 ierr = cudaFreeHost(AAUp);CHKERRCUSP(ierr); 326 } catch(char* ex) { 327 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatSeqAIJCUSPARSEAnalysisAndCopyToGPU" 335 PetscErrorCode MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(Mat A) 336 { 337 PetscErrorCode ierr; 338 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 339 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 340 IS isrow = a->row,iscol = a->icol; 341 PetscBool row_identity,col_identity; 342 const PetscInt *r,*c; 343 PetscInt n = A->rmap->n; 344 345 PetscFunctionBegin; 346 ierr = MatSeqAIJCUSPARSEBuildLowerTriMatrix(A);CHKERRQ(ierr); 347 ierr = MatSeqAIJCUSPARSEBuildUpperTriMatrix(A);CHKERRQ(ierr); 348 349 cusparseTriFactors->tempvec = new CUSPARRAY; 350 cusparseTriFactors->tempvec->resize(n); 351 352 A->valid_GPU_matrix = PETSC_CUSP_BOTH; 353 /*lower triangular indices */ 354 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 355 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 356 if (!row_identity) { 357 ierr = cusparseTriFactors->loTriFactorPtr->setOrdIndices(r, n);CHKERRCUSP(ierr); 358 } 359 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 360 361 /*upper triangular indices */ 362 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 363 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 364 if (!col_identity) { 365 ierr = cusparseTriFactors->upTriFactorPtr->setOrdIndices(c, n);CHKERRCUSP(ierr); 366 } 367 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJCUSPARSE" 373 PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat B,Mat A,const MatFactorInfo *info) 374 { 375 PetscErrorCode ierr; 376 Mat_SeqAIJ *b =(Mat_SeqAIJ*)B->data; 377 IS isrow = b->row,iscol = b->col; 378 PetscBool row_identity,col_identity; 379 380 PetscFunctionBegin; 381 ierr = MatLUFactorNumeric_SeqAIJ(B,A,info);CHKERRQ(ierr); 382 /* determine which version of MatSolve needs to be used. */ 383 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 384 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 385 if (row_identity && col_identity) B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering; 386 else B->ops->solve = MatSolve_SeqAIJCUSPARSE; 387 388 /* get the triangular factors */ 389 ierr = MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(B);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 394 395 #undef __FUNCT__ 396 #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE" 397 PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat A,Vec bb,Vec xx) 398 { 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 400 PetscErrorCode ierr; 401 CUSPARRAY *xGPU=PETSC_NULL, *bGPU=PETSC_NULL; 402 cusparseStatus_t stat; 403 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 404 GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 405 GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 406 CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; 407 408 PetscFunctionBegin; 409 /* Get the GPU pointers */ 410 ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); 411 ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); 412 413 /* solve with reordering */ 414 ierr = cusparseMatLo->reorderIn(xGPU, bGPU);CHKERRCUSP(ierr); 415 stat = cusparseMatLo->solve(xGPU, tempGPU);CHKERRCUSP(stat); 416 stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat); 417 ierr = cusparseMatUp->reorderOut(xGPU);CHKERRCUSP(ierr); 418 419 ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); 420 ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); 421 ierr = WaitForGPU();CHKERRCUSP(ierr); 422 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 427 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE_NaturalOrdering" 431 PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat A,Vec bb,Vec xx) 432 { 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 434 PetscErrorCode ierr; 435 CUSPARRAY *xGPU=PETSC_NULL, *bGPU=PETSC_NULL; 436 cusparseStatus_t stat; 437 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 438 GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 439 GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 440 CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; 441 442 PetscFunctionBegin; 443 /* Get the GPU pointers */ 444 ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); 445 ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); 446 447 /* solve */ 448 stat = cusparseMatLo->solve(bGPU, tempGPU);CHKERRCUSP(stat); 449 stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat); 450 451 ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); 452 ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); 453 ierr = WaitForGPU();CHKERRCUSP(ierr); 454 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatSeqAIJCUSPARSECopyToGPU" 460 PetscErrorCode MatSeqAIJCUSPARSECopyToGPU(Mat A) 461 { 462 463 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 PetscInt m = A->rmap->n,*ii,*ridx; 466 PetscErrorCode ierr; 467 468 469 PetscFunctionBegin; 470 if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU) { 471 ierr = PetscLogEventBegin(MAT_CUSPARSECopyToGPU,A,0,0,0);CHKERRQ(ierr); 472 /* 473 It may be possible to reuse nonzero structure with new matrix values but 474 for simplicity and insured correctness we delete and build a new matrix on 475 the GPU. Likely a very small performance hit. 476 */ 477 if (cusparseMat->mat) { 478 try { 479 delete cusparseMat->mat; 480 if (cusparseMat->tempvec) delete cusparseMat->tempvec; 481 482 } catch(char *ex) { 483 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 484 } 485 } 486 try { 487 cusparseMat->nonzerorow=0; 488 for (int j = 0; j<m; j++) cusparseMat->nonzerorow += ((a->i[j+1]-a->i[j])>0); 489 490 if (a->compressedrow.use) { 491 m = a->compressedrow.nrows; 492 ii = a->compressedrow.i; 493 ridx = a->compressedrow.rindex; 494 } else { 495 /* Forcing compressed row on the GPU ... only relevant for CSR storage */ 496 int k=0; 497 ierr = PetscMalloc((cusparseMat->nonzerorow+1)*sizeof(PetscInt), &ii);CHKERRQ(ierr); 498 ierr = PetscMalloc((cusparseMat->nonzerorow)*sizeof(PetscInt), &ridx);CHKERRQ(ierr); 499 ii[0]=0; 500 for (int j = 0; j<m; j++) { 501 if ((a->i[j+1]-a->i[j])>0) { 502 ii[k] = a->i[j]; 503 ridx[k]= j; 504 k++; 505 } 506 } 507 ii[cusparseMat->nonzerorow] = a->nz; 508 509 m = cusparseMat->nonzerorow; 510 } 511 512 /* Build our matrix ... first determine the GPU storage type */ 513 cusparseMat->mat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseMat->format]); 514 515 /* Create the streams and events (if desired). */ 516 PetscMPIInt size; 517 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 518 ierr = cusparseMat->mat->buildStreamsAndEvents(size, &theBodyStream);CHKERRCUSP(ierr); 519 520 /* FILL MODE UPPER is irrelevant */ 521 cusparseStatus_t stat = cusparseMat->mat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); 522 523 /* lastly, build the matrix */ 524 ierr = cusparseMat->mat->setMatrix(m, A->cmap->n, a->nz, ii, a->j, a->a);CHKERRCUSP(ierr); 525 cusparseMat->mat->setCPRowIndices(ridx, m); 526 if (!a->compressedrow.use) { 527 ierr = PetscFree(ii);CHKERRQ(ierr); 528 ierr = PetscFree(ridx);CHKERRQ(ierr); 529 } 530 cusparseMat->tempvec = new CUSPARRAY; 531 cusparseMat->tempvec->resize(m); 532 } catch(char *ex) { 533 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 534 } 535 ierr = WaitForGPU();CHKERRCUSP(ierr); 536 537 A->valid_GPU_matrix = PETSC_CUSP_BOTH; 538 539 ierr = PetscLogEventEnd(MAT_CUSPARSECopyToGPU,A,0,0,0);CHKERRQ(ierr); 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatGetVecs_SeqAIJCUSPARSE" 546 PetscErrorCode MatGetVecs_SeqAIJCUSPARSE(Mat mat, Vec *right, Vec *left) 547 { 548 PetscErrorCode ierr; 549 550 PetscFunctionBegin; 551 if (right) { 552 ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 553 ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 554 ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 555 ierr = VecSetType(*right,VECSEQCUSP);CHKERRQ(ierr); 556 ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 557 } 558 if (left) { 559 ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 560 ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 561 ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 562 ierr = VecSetType(*left,VECSEQCUSP);CHKERRQ(ierr); 563 ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 564 } 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatMult_SeqAIJCUSPARSE" 570 PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy) 571 { 572 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 573 PetscErrorCode ierr; 574 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 575 CUSPARRAY *xarray =PETSC_NULL,*yarray=PETSC_NULL; 576 577 PetscFunctionBegin; 578 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 579 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 580 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 581 ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr); 582 try { 583 ierr = cusparseMat->mat->multiply(xarray, yarray);CHKERRCUSP(ierr); 584 } catch (char *ex) { 585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 586 } 587 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 588 ierr = VecCUSPRestoreArrayWrite(yy,&yarray);CHKERRQ(ierr); 589 if (!cusparseMat->mat->hasNonZeroStream()) { 590 ierr = WaitForGPU();CHKERRCUSP(ierr); 591 } 592 ierr = PetscLogFlops(2.0*a->nz - cusparseMat->nonzerorow);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultTranspose_SeqAIJCUSPARSE" 599 PetscErrorCode MatMultTranspose_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy) 600 { 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 602 PetscErrorCode ierr; 603 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 604 CUSPARRAY *xarray =PETSC_NULL,*yarray=PETSC_NULL; 605 606 PetscFunctionBegin; 607 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 608 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 609 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 610 ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr); 611 try { 612 #if !defined(PETSC_USE_COMPLEX) 613 ierr = cusparseMat->mat->multiply(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); 614 #else 615 ierr = cusparseMat->mat->multiply(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); 616 #endif 617 } catch (char *ex) { 618 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 619 } 620 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 621 ierr = VecCUSPRestoreArrayWrite(yy,&yarray);CHKERRQ(ierr); 622 if (!cusparseMat->mat->hasNonZeroStream()) { 623 ierr = WaitForGPU();CHKERRCUSP(ierr); 624 } 625 ierr = PetscLogFlops(2.0*a->nz - cusparseMat->nonzerorow);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatMultAdd_SeqAIJCUSPARSE" 631 PetscErrorCode MatMultAdd_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy,Vec zz) 632 { 633 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 634 PetscErrorCode ierr; 635 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 636 CUSPARRAY *xarray = PETSC_NULL,*yarray=PETSC_NULL,*zarray=PETSC_NULL; 637 638 PetscFunctionBegin; 639 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 640 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 641 try { 642 ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); 643 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 644 ierr = VecCUSPGetArrayRead(yy,&yarray);CHKERRQ(ierr); 645 ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr); 646 647 /* multiply add */ 648 ierr = cusparseMat->mat->multiplyAdd(xarray, zarray);CHKERRCUSP(ierr); 649 650 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 651 ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr); 652 ierr = VecCUSPRestoreArrayWrite(zz,&zarray);CHKERRQ(ierr); 653 654 } catch(char *ex) { 655 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 656 } 657 ierr = WaitForGPU();CHKERRCUSP(ierr); 658 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultAdd_SeqAIJCUSPARSE" 664 PetscErrorCode MatMultTransposeAdd_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 PetscErrorCode ierr; 668 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 669 CUSPARRAY *xarray,*yarray,*zarray; 670 671 PetscFunctionBegin; 672 /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE 673 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ 674 try { 675 ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); 676 ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); 677 ierr = VecCUSPGetArrayRead(yy,&yarray);CHKERRQ(ierr); 678 ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr); 679 680 /* multiply add with matrix transpose */ 681 #if !defined(PETSC_USE_COMPLEX) 682 ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); 683 #else 684 ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); 685 #endif 686 687 ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); 688 ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr); 689 ierr = VecCUSPRestoreArrayWrite(zz,&zarray);CHKERRQ(ierr); 690 691 } catch(char *ex) { 692 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 693 } 694 ierr = WaitForGPU();CHKERRCUSP(ierr); 695 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatAssemblyEnd_SeqAIJCUSPARSE" 701 PetscErrorCode MatAssemblyEnd_SeqAIJCUSPARSE(Mat A,MatAssemblyType mode) 702 { 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 707 ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); 708 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 709 A->ops->mult = MatMult_SeqAIJCUSPARSE; 710 A->ops->multadd = MatMultAdd_SeqAIJCUSPARSE; 711 A->ops->multtranspose = MatMultTranspose_SeqAIJCUSPARSE; 712 A->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJCUSPARSE; 713 PetscFunctionReturn(0); 714 } 715 716 /* --------------------------------------------------------------------------------*/ 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatCreateSeqAIJCUSPARSE" 719 /*@ 720 MatCreateSeqAIJCUSPARSE - Creates a sparse matrix in AIJ (compressed row) format 721 (the default parallel PETSc format). This matrix will ultimately pushed down 722 to NVidia GPUs and use the CUSPARSE library for calculations. For good matrix 723 assembly performance the user should preallocate the matrix storage by setting 724 the parameter nz (or the array nnz). By setting these parameters accurately, 725 performance during matrix assembly can be increased by more than a factor of 50. 726 727 Collective on MPI_Comm 728 729 Input Parameters: 730 + comm - MPI communicator, set to PETSC_COMM_SELF 731 . m - number of rows 732 . n - number of columns 733 . nz - number of nonzeros per row (same for all rows) 734 - nnz - array containing the number of nonzeros in the various rows 735 (possibly different for each row) or PETSC_NULL 736 737 Output Parameter: 738 . A - the matrix 739 740 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 741 MatXXXXSetPreallocation() paradgm instead of this routine directly. 742 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 743 744 Notes: 745 If nnz is given then nz is ignored 746 747 The AIJ format (also called the Yale sparse matrix format or 748 compressed row storage), is fully compatible with standard Fortran 77 749 storage. That is, the stored row and column indices can begin at 750 either one (as in Fortran) or zero. See the users' manual for details. 751 752 Specify the preallocated storage with either nz or nnz (not both). 753 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 754 allocation. For large problems you MUST preallocate memory or you 755 will get TERRIBLE performance, see the users' manual chapter on matrices. 756 757 By default, this format uses inodes (identical nodes) when possible, to 758 improve numerical efficiency of matrix-vector products and solves. We 759 search for consecutive rows with the same nonzero structure, thereby 760 reusing matrix information to achieve increased efficiency. 761 762 Level: intermediate 763 764 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSEQAIJCUSPARSE, MATAIJCUSPARSE 765 @*/ 766 PetscErrorCode MatCreateSeqAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 767 { 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 ierr = MatCreate(comm,A);CHKERRQ(ierr); 772 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 773 ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 774 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatDestroy_SeqAIJCUSPARSE" 780 PetscErrorCode MatDestroy_SeqAIJCUSPARSE(Mat A) 781 { 782 PetscErrorCode ierr; 783 Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; 784 785 PetscFunctionBegin; 786 if (A->factortype==MAT_FACTOR_NONE) { 787 try { 788 if (A->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) { 789 delete (GPU_Matrix_Ifc*)(cusparseMat->mat); 790 } 791 if (cusparseMat->tempvec!=0) delete cusparseMat->tempvec; 792 delete cusparseMat; 793 A->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED; 794 } catch(char *ex) { 795 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 796 } 797 } else { 798 /* The triangular factors */ 799 try { 800 Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; 801 GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; 802 GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; 803 delete (GPU_Matrix_Ifc*) cusparseMatLo; 804 delete (GPU_Matrix_Ifc*) cusparseMatUp; 805 delete (CUSPARRAY*) cusparseTriFactors->tempvec; 806 delete cusparseTriFactors; 807 } catch(char *ex) { 808 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); 809 } 810 } 811 if (MAT_cusparseHandle) { 812 cusparseStatus_t stat; 813 stat = cusparseDestroy(MAT_cusparseHandle);CHKERRCUSP(stat); 814 815 MAT_cusparseHandle=0; 816 } 817 /*this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 818 A->spptr = 0; 819 820 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 EXTERN_C_BEGIN 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatCreate_SeqAIJCUSPARSE" 827 PetscErrorCode MatCreate_SeqAIJCUSPARSE(Mat B) 828 { 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 833 if (B->factortype==MAT_FACTOR_NONE) { 834 /* you cannot check the inode.use flag here since the matrix was just created. 835 now build a GPU matrix data structure */ 836 B->spptr = new Mat_SeqAIJCUSPARSE; 837 838 ((Mat_SeqAIJCUSPARSE*)B->spptr)->mat = 0; 839 ((Mat_SeqAIJCUSPARSE*)B->spptr)->tempvec = 0; 840 ((Mat_SeqAIJCUSPARSE*)B->spptr)->format = MAT_CUSPARSE_CSR; 841 } else { 842 /* NEXT, set the pointers to the triangular factors */ 843 B->spptr = new Mat_SeqAIJCUSPARSETriFactors; 844 845 ((Mat_SeqAIJCUSPARSETriFactors*)B->spptr)->loTriFactorPtr = 0; 846 ((Mat_SeqAIJCUSPARSETriFactors*)B->spptr)->upTriFactorPtr = 0; 847 ((Mat_SeqAIJCUSPARSETriFactors*)B->spptr)->tempvec = 0; 848 ((Mat_SeqAIJCUSPARSETriFactors*)B->spptr)->format = cusparseMatSolveStorageFormat; 849 } 850 /* Create a single instance of the MAT_cusparseHandle for any matrix (matMult, TriSolve, ...) */ 851 if (!MAT_cusparseHandle) { 852 cusparseStatus_t stat; 853 stat = cusparseCreate(&MAT_cusparseHandle);CHKERRCUSP(stat); 854 } 855 /* Here we overload MatGetFactor_petsc_C which enables -mat_type aijcusparse to use the 856 default cusparse tri solve. Note the difference with the implementation in 857 MatCreate_SeqAIJCUSP in ../seqcusp/aijcusp.cu */ 858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqaij_cusparse",MatGetFactor_seqaij_cusparse);CHKERRQ(ierr); 859 860 B->ops->assemblyend = MatAssemblyEnd_SeqAIJCUSPARSE; 861 B->ops->destroy = MatDestroy_SeqAIJCUSPARSE; 862 B->ops->getvecs = MatGetVecs_SeqAIJCUSPARSE; 863 B->ops->setfromoptions = MatSetFromOptions_SeqAIJCUSPARSE; 864 B->ops->mult = MatMult_SeqAIJCUSPARSE; 865 B->ops->multadd = MatMultAdd_SeqAIJCUSPARSE; 866 B->ops->multtranspose = MatMultTranspose_SeqAIJCUSPARSE; 867 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJCUSPARSE; 868 869 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 870 871 B->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED; 872 873 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B, "MatCUSPARSESetFormat_C", "MatCUSPARSESetFormat_SeqAIJCUSPARSE", MatCUSPARSESetFormat_SeqAIJCUSPARSE);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 EXTERN_C_END 877 878 /*M 879 MATSEQAIJCUSPARSE - MATAIJCUSPARSE = "(seq)aijcusparse" - A matrix type to be used for sparse matrices. 880 881 A matrix type type whose data resides on Nvidia GPUs. These matrices can be in either 882 CSR, ELL, or Hybrid format. All matrix calculations are performed on Nvidia GPUs using 883 the CUSPARSE library. This type is only available when using the 'txpetscgpu' package. 884 Use --download-txpetscgpu to build/install PETSc to use different CUSPARSE library and 885 the different GPU storage formats. 886 887 Options Database Keys: 888 + -mat_type aijcusparse - sets the matrix type to "seqaijcusparse" during a call to MatSetFromOptions() 889 . -mat_cusparse_storage_format csr - sets the storage format of matrices (for MatMult and factors in MatSolve) during a call to MatSetFromOptions(). Other options include ell (ellpack) or hyb (hybrid). Only available with 'txpetscgpu' package. 890 . -mat_cusparse_mult_storage_format csr - sets the storage format of matrices (for MatMult) during a call to MatSetFromOptions(). Other options include ell (ellpack) or hyb (hybrid). Only available with 'txpetscgpu' package. 891 - -mat_cusparse_solve_storage_format csr - sets the storage format matrices (for factors in MatSolve) during a call to MatSetFromOptions(). Other options include ell (ellpack) or hyb (hybrid). Only available with 'txpetscgpu' package. 892 893 Level: beginner 894 895 .seealso: MatCreateSeqAIJCUSPARSE(), MATAIJCUSPARSE, MatCreateAIJCUSPARSE(), MatCUSPARSESetFormat(), MatCUSPARSEStorageFormat, MatCUSPARSEFormatOperation 896 M*/ 897