19ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2*bbf3fe20SPaul Mullowney #include "mpicusparsematimpl.h" 39ae82921SPaul Mullowney 49ae82921SPaul Mullowney EXTERN_C_BEGIN 59ae82921SPaul Mullowney #undef __FUNCT__ 69ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE" 79ae82921SPaul Mullowney PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 89ae82921SPaul Mullowney { 9*bbf3fe20SPaul Mullowney Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 10*bbf3fe20SPaul Mullowney Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr; 119ae82921SPaul Mullowney PetscErrorCode ierr; 129ae82921SPaul Mullowney PetscInt i; 139ae82921SPaul Mullowney 149ae82921SPaul Mullowney PetscFunctionBegin; 159ae82921SPaul Mullowney if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 169ae82921SPaul Mullowney if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 179ae82921SPaul Mullowney if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 189ae82921SPaul Mullowney if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 199ae82921SPaul Mullowney 209ae82921SPaul Mullowney ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 219ae82921SPaul Mullowney ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 229ae82921SPaul Mullowney ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 239ae82921SPaul Mullowney ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 249ae82921SPaul Mullowney if (d_nnz) { 259ae82921SPaul Mullowney for (i=0; i<B->rmap->n; i++) { 269ae82921SPaul Mullowney if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 279ae82921SPaul Mullowney } 289ae82921SPaul Mullowney } 299ae82921SPaul Mullowney if (o_nnz) { 309ae82921SPaul Mullowney for (i=0; i<B->rmap->n; i++) { 319ae82921SPaul Mullowney if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 329ae82921SPaul Mullowney } 339ae82921SPaul Mullowney } 349ae82921SPaul Mullowney if (!B->preallocated) { 35*bbf3fe20SPaul Mullowney /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */ 369ae82921SPaul Mullowney ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 379ae82921SPaul Mullowney ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 389ae82921SPaul Mullowney ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 399ae82921SPaul Mullowney ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 409ae82921SPaul Mullowney ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 419ae82921SPaul Mullowney ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 429ae82921SPaul Mullowney ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 439ae82921SPaul Mullowney ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 449ae82921SPaul Mullowney } 459ae82921SPaul Mullowney ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 469ae82921SPaul Mullowney ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 47*bbf3fe20SPaul Mullowney ierr=MatSetOption_SeqAIJCUSPARSE(b->A,cusparseStruct->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr); 48*bbf3fe20SPaul Mullowney ierr=MatSetOption_SeqAIJCUSPARSE(b->B,cusparseStruct->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr); 499ae82921SPaul Mullowney B->preallocated = PETSC_TRUE; 509ae82921SPaul Mullowney PetscFunctionReturn(0); 519ae82921SPaul Mullowney } 529ae82921SPaul Mullowney EXTERN_C_END 539ae82921SPaul Mullowney #undef __FUNCT__ 549ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE" 559ae82921SPaul Mullowney PetscErrorCode MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left) 569ae82921SPaul Mullowney { 579ae82921SPaul Mullowney PetscErrorCode ierr; 589ae82921SPaul Mullowney 599ae82921SPaul Mullowney PetscFunctionBegin; 609ae82921SPaul Mullowney if (right) { 619ae82921SPaul Mullowney ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 629ae82921SPaul Mullowney ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 639ae82921SPaul Mullowney ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 649ae82921SPaul Mullowney ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr); 659ae82921SPaul Mullowney ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 669ae82921SPaul Mullowney } 679ae82921SPaul Mullowney if (left) { 689ae82921SPaul Mullowney ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 699ae82921SPaul Mullowney ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 709ae82921SPaul Mullowney ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 719ae82921SPaul Mullowney ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr); 729ae82921SPaul Mullowney ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 739ae82921SPaul Mullowney } 749ae82921SPaul Mullowney PetscFunctionReturn(0); 759ae82921SPaul Mullowney } 769ae82921SPaul Mullowney 779ae82921SPaul Mullowney 789ae82921SPaul Mullowney #undef __FUNCT__ 799ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 809ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 819ae82921SPaul Mullowney { 829ae82921SPaul Mullowney // This multiplication sequence is different sequence 839ae82921SPaul Mullowney // than the CPU version. In particular, the diagonal block 849ae82921SPaul Mullowney // multiplication kernel is launched in one stream. Then, 859ae82921SPaul Mullowney // in a separate stream, the data transfers from DeviceToHost 869ae82921SPaul Mullowney // (with MPI messaging in between), then HostToDevice are 879ae82921SPaul Mullowney // launched. Once the data transfer stream is synchronized, 889ae82921SPaul Mullowney // to ensure messaging is complete, the MatMultAdd kernel 899ae82921SPaul Mullowney // is launched in the original (MatMult) stream to protect 909ae82921SPaul Mullowney // against race conditions. 919ae82921SPaul Mullowney // 929ae82921SPaul Mullowney // This sequence should only be called for GPU computation. 939ae82921SPaul Mullowney Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 949ae82921SPaul Mullowney PetscErrorCode ierr; 959ae82921SPaul Mullowney PetscInt nt; 969ae82921SPaul Mullowney 979ae82921SPaul Mullowney PetscFunctionBegin; 989ae82921SPaul Mullowney ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 999ae82921SPaul Mullowney if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1009ae82921SPaul Mullowney ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 1019ae82921SPaul Mullowney ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1029ae82921SPaul Mullowney ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039ae82921SPaul Mullowney ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049ae82921SPaul Mullowney ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1059ae82921SPaul Mullowney ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 1069ae82921SPaul Mullowney PetscFunctionReturn(0); 1079ae82921SPaul Mullowney } 108ca45077fSPaul Mullowney 109ca45077fSPaul Mullowney #undef __FUNCT__ 110ca45077fSPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 111ca45077fSPaul Mullowney PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 112ca45077fSPaul Mullowney { 113ca45077fSPaul Mullowney // This multiplication sequence is different sequence 114ca45077fSPaul Mullowney // than the CPU version. In particular, the diagonal block 115ca45077fSPaul Mullowney // multiplication kernel is launched in one stream. Then, 116ca45077fSPaul Mullowney // in a separate stream, the data transfers from DeviceToHost 117ca45077fSPaul Mullowney // (with MPI messaging in between), then HostToDevice are 118ca45077fSPaul Mullowney // launched. Once the data transfer stream is synchronized, 119ca45077fSPaul Mullowney // to ensure messaging is complete, the MatMultAdd kernel 120ca45077fSPaul Mullowney // is launched in the original (MatMult) stream to protect 121ca45077fSPaul Mullowney // against race conditions. 122ca45077fSPaul Mullowney // 123ca45077fSPaul Mullowney // This sequence should only be called for GPU computation. 124ca45077fSPaul Mullowney Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 125ca45077fSPaul Mullowney PetscErrorCode ierr; 126ca45077fSPaul Mullowney PetscInt nt; 127ca45077fSPaul Mullowney 128ca45077fSPaul Mullowney PetscFunctionBegin; 129ca45077fSPaul Mullowney ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 130ca45077fSPaul Mullowney if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 131ca45077fSPaul Mullowney ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 132ca45077fSPaul Mullowney ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 133ca45077fSPaul Mullowney ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 134ca45077fSPaul Mullowney ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 135ca45077fSPaul Mullowney ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 136ca45077fSPaul Mullowney ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 137ca45077fSPaul Mullowney PetscFunctionReturn(0); 138ca45077fSPaul Mullowney } 1399ae82921SPaul Mullowney 1409ae82921SPaul Mullowney EXTERN_C_BEGIN 1419ae82921SPaul Mullowney PetscErrorCode MatCreate_MPIAIJ(Mat); 1429ae82921SPaul Mullowney EXTERN_C_END 1439ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats); 1449ae82921SPaul Mullowney 145ca45077fSPaul Mullowney 146ca45077fSPaul Mullowney #undef __FUNCT__ 147ca45077fSPaul Mullowney #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE" 148ca45077fSPaul Mullowney PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool flg) 149ca45077fSPaul Mullowney { 150ca45077fSPaul Mullowney Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 151*bbf3fe20SPaul Mullowney Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 152ca45077fSPaul Mullowney PetscErrorCode ierr; 153ca45077fSPaul Mullowney 154ca45077fSPaul Mullowney PetscFunctionBegin; 155ca45077fSPaul Mullowney ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr); 156ca45077fSPaul Mullowney switch (op) { 157*bbf3fe20SPaul Mullowney case MAT_DIAGBLOCK_CSR: 158*bbf3fe20SPaul Mullowney cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR; 159ca45077fSPaul Mullowney break; 160*bbf3fe20SPaul Mullowney case MAT_OFFDIAGBLOCK_CSR: 161*bbf3fe20SPaul Mullowney cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR; 162ca45077fSPaul Mullowney break; 163*bbf3fe20SPaul Mullowney case MAT_DIAGBLOCK_DIA: 164*bbf3fe20SPaul Mullowney case MAT_OFFDIAGBLOCK_DIA: 165ca45077fSPaul Mullowney case MAT_DIA: 166ca45077fSPaul Mullowney SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type."); 167*bbf3fe20SPaul Mullowney case MAT_DIAGBLOCK_ELL: 168*bbf3fe20SPaul Mullowney cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_ELL; 169ca45077fSPaul Mullowney break; 170*bbf3fe20SPaul Mullowney case MAT_OFFDIAGBLOCK_ELL: 171*bbf3fe20SPaul Mullowney cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_ELL; 172ca45077fSPaul Mullowney break; 173*bbf3fe20SPaul Mullowney case MAT_DIAGBLOCK_HYB: 174*bbf3fe20SPaul Mullowney cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_HYB; 175ca45077fSPaul Mullowney break; 176*bbf3fe20SPaul Mullowney case MAT_OFFDIAGBLOCK_HYB: 177*bbf3fe20SPaul Mullowney cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_HYB; 178ca45077fSPaul Mullowney break; 179ca45077fSPaul Mullowney default: 180ca45077fSPaul Mullowney break; 181ca45077fSPaul Mullowney } 182ca45077fSPaul Mullowney PetscFunctionReturn(0); 183ca45077fSPaul Mullowney } 184ca45077fSPaul Mullowney 185ca45077fSPaul Mullowney 186ca45077fSPaul Mullowney EXTERN_C_BEGIN 187ca45077fSPaul Mullowney #undef __FUNCT__ 188ca45077fSPaul Mullowney #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE" 189ca45077fSPaul Mullowney PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A) 190ca45077fSPaul Mullowney { 191ca45077fSPaul Mullowney Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 192*bbf3fe20SPaul Mullowney Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 193ca45077fSPaul Mullowney PetscErrorCode ierr; 194ca45077fSPaul Mullowney PetscInt idxDiag=0,idxOffDiag=0; 195ca45077fSPaul Mullowney char * formats[]={CSR,ELL,HYB}; 196ca45077fSPaul Mullowney MatOption diagFormat, offdiagFormat; 197ca45077fSPaul Mullowney PetscBool flg; 198ca45077fSPaul Mullowney PetscFunctionBegin; 199ca45077fSPaul Mullowney ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr); 200ca45077fSPaul Mullowney if (A->factortype==MAT_FACTOR_NONE) { 201ca45077fSPaul Mullowney ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format", 202ca45077fSPaul Mullowney "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV", 203ca45077fSPaul Mullowney "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr); 204ca45077fSPaul Mullowney ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format", 205ca45077fSPaul Mullowney "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV", 206ca45077fSPaul Mullowney "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr); 207ca45077fSPaul Mullowney 208ca45077fSPaul Mullowney if (formats[idxDiag] == CSR) 209ca45077fSPaul Mullowney diagFormat=MAT_CSR; 210ca45077fSPaul Mullowney else if (formats[idxDiag] == ELL) 211ca45077fSPaul Mullowney diagFormat=MAT_ELL; 212ca45077fSPaul Mullowney else if (formats[idxDiag] == HYB) 213ca45077fSPaul Mullowney diagFormat=MAT_HYB; 214ca45077fSPaul Mullowney 215ca45077fSPaul Mullowney if (formats[idxOffDiag] == CSR) 216ca45077fSPaul Mullowney offdiagFormat=MAT_CSR; 217ca45077fSPaul Mullowney else if (formats[idxOffDiag] == ELL) 218ca45077fSPaul Mullowney offdiagFormat=MAT_ELL; 219ca45077fSPaul Mullowney else if (formats[idxOffDiag] == HYB) 220ca45077fSPaul Mullowney offdiagFormat=MAT_HYB; 221ca45077fSPaul Mullowney 222*bbf3fe20SPaul Mullowney cusparseStruct->diagGPUMatFormat = diagFormat; 223*bbf3fe20SPaul Mullowney cusparseStruct->offdiagGPUMatFormat = offdiagFormat; 224ca45077fSPaul Mullowney } 225ca45077fSPaul Mullowney ierr = PetscOptionsEnd();CHKERRQ(ierr); 226ca45077fSPaul Mullowney PetscFunctionReturn(0); 227ca45077fSPaul Mullowney } 228ca45077fSPaul Mullowney EXTERN_C_END 229ca45077fSPaul Mullowney 230*bbf3fe20SPaul Mullowney EXTERN_C_BEGIN 231*bbf3fe20SPaul Mullowney #undef __FUNCT__ 232*bbf3fe20SPaul Mullowney #define __FUNCT__ "MatDestroy_MPIAIJCUSPARSE" 233*bbf3fe20SPaul Mullowney PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A) 234*bbf3fe20SPaul Mullowney { 235*bbf3fe20SPaul Mullowney PetscErrorCode ierr; 236*bbf3fe20SPaul Mullowney Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 237*bbf3fe20SPaul Mullowney Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 238*bbf3fe20SPaul Mullowney 239*bbf3fe20SPaul Mullowney PetscFunctionBegin; 240*bbf3fe20SPaul Mullowney try { 241*bbf3fe20SPaul Mullowney delete cusparseStruct; 242*bbf3fe20SPaul Mullowney } catch(char* ex) { 243*bbf3fe20SPaul Mullowney SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex); 244*bbf3fe20SPaul Mullowney } 245*bbf3fe20SPaul Mullowney cusparseStruct = 0; 246*bbf3fe20SPaul Mullowney ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 247*bbf3fe20SPaul Mullowney PetscFunctionReturn(0); 248*bbf3fe20SPaul Mullowney } 249*bbf3fe20SPaul Mullowney EXTERN_C_END 250ca45077fSPaul Mullowney 2519ae82921SPaul Mullowney EXTERN_C_BEGIN 2529ae82921SPaul Mullowney #undef __FUNCT__ 2539ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE" 254*bbf3fe20SPaul Mullowney PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat A) 2559ae82921SPaul Mullowney { 2569ae82921SPaul Mullowney PetscErrorCode ierr; 257*bbf3fe20SPaul Mullowney Mat_MPIAIJ *a; 258*bbf3fe20SPaul Mullowney Mat_MPIAIJCUSPARSE * cusparseStruct; 2599ae82921SPaul Mullowney 2609ae82921SPaul Mullowney PetscFunctionBegin; 261*bbf3fe20SPaul Mullowney ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr); 262*bbf3fe20SPaul Mullowney ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIAIJSetPreallocation_C", 2639ae82921SPaul Mullowney "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE", 2649ae82921SPaul Mullowney MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr); 265*bbf3fe20SPaul Mullowney a = (Mat_MPIAIJ*)A->data; 266*bbf3fe20SPaul Mullowney a->spptr = new Mat_MPIAIJCUSPARSE; 267*bbf3fe20SPaul Mullowney cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 268*bbf3fe20SPaul Mullowney cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR; 269*bbf3fe20SPaul Mullowney cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR; 270*bbf3fe20SPaul Mullowney A->ops->getvecs = MatGetVecs_MPIAIJCUSPARSE; 271*bbf3fe20SPaul Mullowney A->ops->mult = MatMult_MPIAIJCUSPARSE; 272*bbf3fe20SPaul Mullowney A->ops->multtranspose = MatMultTranspose_MPIAIJCUSPARSE; 273*bbf3fe20SPaul Mullowney A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE; 274*bbf3fe20SPaul Mullowney A->ops->setoption = MatSetOption_MPIAIJCUSPARSE; 275*bbf3fe20SPaul Mullowney A->ops->destroy = MatDestroy_MPIAIJCUSPARSE; 276*bbf3fe20SPaul Mullowney ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 2779ae82921SPaul Mullowney PetscFunctionReturn(0); 2789ae82921SPaul Mullowney } 2799ae82921SPaul Mullowney EXTERN_C_END 2809ae82921SPaul Mullowney 2819ae82921SPaul Mullowney 2829ae82921SPaul Mullowney #undef __FUNCT__ 2839ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE" 2849ae82921SPaul Mullowney PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2859ae82921SPaul Mullowney { 2869ae82921SPaul Mullowney PetscErrorCode ierr; 2879ae82921SPaul Mullowney PetscMPIInt size; 2889ae82921SPaul Mullowney 2899ae82921SPaul Mullowney PetscFunctionBegin; 2909ae82921SPaul Mullowney ierr = MatCreate(comm,A);CHKERRQ(ierr); 2919ae82921SPaul Mullowney ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2929ae82921SPaul Mullowney ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2939ae82921SPaul Mullowney if (size > 1) { 2949ae82921SPaul Mullowney ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 2959ae82921SPaul Mullowney ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2969ae82921SPaul Mullowney } else { 2979ae82921SPaul Mullowney ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 2989ae82921SPaul Mullowney ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2999ae82921SPaul Mullowney } 3009ae82921SPaul Mullowney PetscFunctionReturn(0); 3019ae82921SPaul Mullowney } 3029ae82921SPaul Mullowney 3039ae82921SPaul Mullowney /*MC 3049ae82921SPaul Mullowney MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices. 3059ae82921SPaul Mullowney 3069ae82921SPaul Mullowney This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator, 3079ae82921SPaul Mullowney and MATMPIAIJCUSPARSE otherwise. As a result, for single process communicators, 3089ae82921SPaul Mullowney MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 3099ae82921SPaul Mullowney for communicators controlling multiple processes. It is recommended that you call both of 3109ae82921SPaul Mullowney the above preallocation routines for simplicity. 3119ae82921SPaul Mullowney 3129ae82921SPaul Mullowney Options Database Keys: 3139ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions() 3149ae82921SPaul Mullowney 3159ae82921SPaul Mullowney Level: beginner 3169ae82921SPaul Mullowney 3179ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE 3189ae82921SPaul Mullowney M*/ 3199ae82921SPaul Mullowney 320