xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision bbf3fe20a13d25761406078d7bb366504465e38d)
19ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2*bbf3fe20SPaul Mullowney #include "mpicusparsematimpl.h"
39ae82921SPaul Mullowney 
49ae82921SPaul Mullowney EXTERN_C_BEGIN
59ae82921SPaul Mullowney #undef __FUNCT__
69ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE"
79ae82921SPaul Mullowney PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
89ae82921SPaul Mullowney {
9*bbf3fe20SPaul Mullowney   Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
10*bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr;
119ae82921SPaul Mullowney   PetscErrorCode ierr;
129ae82921SPaul Mullowney   PetscInt       i;
139ae82921SPaul Mullowney 
149ae82921SPaul Mullowney   PetscFunctionBegin;
159ae82921SPaul Mullowney   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
169ae82921SPaul Mullowney   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
179ae82921SPaul Mullowney   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
189ae82921SPaul Mullowney   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
199ae82921SPaul Mullowney 
209ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
219ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
229ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
239ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
249ae82921SPaul Mullowney   if (d_nnz) {
259ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
269ae82921SPaul Mullowney       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
279ae82921SPaul Mullowney     }
289ae82921SPaul Mullowney   }
299ae82921SPaul Mullowney   if (o_nnz) {
309ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
319ae82921SPaul Mullowney       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
329ae82921SPaul Mullowney     }
339ae82921SPaul Mullowney   }
349ae82921SPaul Mullowney   if (!B->preallocated) {
35*bbf3fe20SPaul Mullowney     /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */
369ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
379ae82921SPaul Mullowney     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
389ae82921SPaul Mullowney     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
399ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
409ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
419ae82921SPaul Mullowney     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
429ae82921SPaul Mullowney     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
439ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
449ae82921SPaul Mullowney   }
459ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
469ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
47*bbf3fe20SPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->A,cusparseStruct->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
48*bbf3fe20SPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->B,cusparseStruct->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
499ae82921SPaul Mullowney   B->preallocated = PETSC_TRUE;
509ae82921SPaul Mullowney   PetscFunctionReturn(0);
519ae82921SPaul Mullowney }
529ae82921SPaul Mullowney EXTERN_C_END
539ae82921SPaul Mullowney #undef __FUNCT__
549ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE"
559ae82921SPaul Mullowney PetscErrorCode  MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left)
569ae82921SPaul Mullowney {
579ae82921SPaul Mullowney   PetscErrorCode ierr;
589ae82921SPaul Mullowney 
599ae82921SPaul Mullowney   PetscFunctionBegin;
609ae82921SPaul Mullowney   if (right) {
619ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
629ae82921SPaul Mullowney     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
639ae82921SPaul Mullowney     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
649ae82921SPaul Mullowney     ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr);
659ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
669ae82921SPaul Mullowney   }
679ae82921SPaul Mullowney   if (left) {
689ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
699ae82921SPaul Mullowney     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
709ae82921SPaul Mullowney     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
719ae82921SPaul Mullowney     ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr);
729ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
739ae82921SPaul Mullowney   }
749ae82921SPaul Mullowney   PetscFunctionReturn(0);
759ae82921SPaul Mullowney }
769ae82921SPaul Mullowney 
779ae82921SPaul Mullowney 
789ae82921SPaul Mullowney #undef __FUNCT__
799ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
809ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
819ae82921SPaul Mullowney {
829ae82921SPaul Mullowney   // This multiplication sequence is different sequence
839ae82921SPaul Mullowney   // than the CPU version. In particular, the diagonal block
849ae82921SPaul Mullowney   // multiplication kernel is launched in one stream. Then,
859ae82921SPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
869ae82921SPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
879ae82921SPaul Mullowney   // launched. Once the data transfer stream is synchronized,
889ae82921SPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
899ae82921SPaul Mullowney   // is launched in the original (MatMult) stream to protect
909ae82921SPaul Mullowney   // against race conditions.
919ae82921SPaul Mullowney   //
929ae82921SPaul Mullowney   // This sequence should only be called for GPU computation.
939ae82921SPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
949ae82921SPaul Mullowney   PetscErrorCode ierr;
959ae82921SPaul Mullowney   PetscInt       nt;
969ae82921SPaul Mullowney 
979ae82921SPaul Mullowney   PetscFunctionBegin;
989ae82921SPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
999ae82921SPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1009ae82921SPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
1019ae82921SPaul Mullowney   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1029ae82921SPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1039ae82921SPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049ae82921SPaul Mullowney   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1059ae82921SPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
1069ae82921SPaul Mullowney   PetscFunctionReturn(0);
1079ae82921SPaul Mullowney }
108ca45077fSPaul Mullowney 
109ca45077fSPaul Mullowney #undef __FUNCT__
110ca45077fSPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
111ca45077fSPaul Mullowney PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
112ca45077fSPaul Mullowney {
113ca45077fSPaul Mullowney   // This multiplication sequence is different sequence
114ca45077fSPaul Mullowney   // than the CPU version. In particular, the diagonal block
115ca45077fSPaul Mullowney   // multiplication kernel is launched in one stream. Then,
116ca45077fSPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
117ca45077fSPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
118ca45077fSPaul Mullowney   // launched. Once the data transfer stream is synchronized,
119ca45077fSPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
120ca45077fSPaul Mullowney   // is launched in the original (MatMult) stream to protect
121ca45077fSPaul Mullowney   // against race conditions.
122ca45077fSPaul Mullowney   //
123ca45077fSPaul Mullowney   // This sequence should only be called for GPU computation.
124ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
125ca45077fSPaul Mullowney   PetscErrorCode ierr;
126ca45077fSPaul Mullowney   PetscInt       nt;
127ca45077fSPaul Mullowney 
128ca45077fSPaul Mullowney   PetscFunctionBegin;
129ca45077fSPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
130ca45077fSPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
131ca45077fSPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
132ca45077fSPaul Mullowney   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
133ca45077fSPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
134ca45077fSPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
135ca45077fSPaul Mullowney   ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
136ca45077fSPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
137ca45077fSPaul Mullowney   PetscFunctionReturn(0);
138ca45077fSPaul Mullowney }
1399ae82921SPaul Mullowney 
1409ae82921SPaul Mullowney EXTERN_C_BEGIN
1419ae82921SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJ(Mat);
1429ae82921SPaul Mullowney EXTERN_C_END
1439ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
1449ae82921SPaul Mullowney 
145ca45077fSPaul Mullowney 
146ca45077fSPaul Mullowney #undef __FUNCT__
147ca45077fSPaul Mullowney #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE"
148ca45077fSPaul Mullowney PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool flg)
149ca45077fSPaul Mullowney {
150ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
151*bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
152ca45077fSPaul Mullowney   PetscErrorCode ierr;
153ca45077fSPaul Mullowney 
154ca45077fSPaul Mullowney   PetscFunctionBegin;
155ca45077fSPaul Mullowney   ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr);
156ca45077fSPaul Mullowney   switch (op) {
157*bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_CSR:
158*bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR;
159ca45077fSPaul Mullowney     break;
160*bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_CSR:
161*bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR;
162ca45077fSPaul Mullowney     break;
163*bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_DIA:
164*bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_DIA:
165ca45077fSPaul Mullowney   case MAT_DIA:
166ca45077fSPaul Mullowney     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type.");
167*bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_ELL:
168*bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_ELL;
169ca45077fSPaul Mullowney     break;
170*bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_ELL:
171*bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_ELL;
172ca45077fSPaul Mullowney     break;
173*bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_HYB:
174*bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_HYB;
175ca45077fSPaul Mullowney     break;
176*bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_HYB:
177*bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_HYB;
178ca45077fSPaul Mullowney     break;
179ca45077fSPaul Mullowney   default:
180ca45077fSPaul Mullowney     break;
181ca45077fSPaul Mullowney   }
182ca45077fSPaul Mullowney   PetscFunctionReturn(0);
183ca45077fSPaul Mullowney }
184ca45077fSPaul Mullowney 
185ca45077fSPaul Mullowney 
186ca45077fSPaul Mullowney EXTERN_C_BEGIN
187ca45077fSPaul Mullowney #undef __FUNCT__
188ca45077fSPaul Mullowney #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE"
189ca45077fSPaul Mullowney PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A)
190ca45077fSPaul Mullowney {
191ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
192*bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
193ca45077fSPaul Mullowney   PetscErrorCode     ierr;
194ca45077fSPaul Mullowney   PetscInt       idxDiag=0,idxOffDiag=0;
195ca45077fSPaul Mullowney   char * formats[]={CSR,ELL,HYB};
196ca45077fSPaul Mullowney   MatOption diagFormat, offdiagFormat;
197ca45077fSPaul Mullowney   PetscBool      flg;
198ca45077fSPaul Mullowney   PetscFunctionBegin;
199ca45077fSPaul Mullowney   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr);
200ca45077fSPaul Mullowney   if (A->factortype==MAT_FACTOR_NONE) {
201ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format",
202ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
203ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr);
204ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format",
205ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
206ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr);
207ca45077fSPaul Mullowney 
208ca45077fSPaul Mullowney     if (formats[idxDiag] == CSR)
209ca45077fSPaul Mullowney       diagFormat=MAT_CSR;
210ca45077fSPaul Mullowney     else if (formats[idxDiag] == ELL)
211ca45077fSPaul Mullowney       diagFormat=MAT_ELL;
212ca45077fSPaul Mullowney     else if (formats[idxDiag] == HYB)
213ca45077fSPaul Mullowney       diagFormat=MAT_HYB;
214ca45077fSPaul Mullowney 
215ca45077fSPaul Mullowney     if (formats[idxOffDiag] == CSR)
216ca45077fSPaul Mullowney       offdiagFormat=MAT_CSR;
217ca45077fSPaul Mullowney     else if (formats[idxOffDiag] == ELL)
218ca45077fSPaul Mullowney       offdiagFormat=MAT_ELL;
219ca45077fSPaul Mullowney     else if (formats[idxOffDiag] == HYB)
220ca45077fSPaul Mullowney       offdiagFormat=MAT_HYB;
221ca45077fSPaul Mullowney 
222*bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = diagFormat;
223*bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = offdiagFormat;
224ca45077fSPaul Mullowney   }
225ca45077fSPaul Mullowney   ierr = PetscOptionsEnd();CHKERRQ(ierr);
226ca45077fSPaul Mullowney   PetscFunctionReturn(0);
227ca45077fSPaul Mullowney }
228ca45077fSPaul Mullowney EXTERN_C_END
229ca45077fSPaul Mullowney 
230*bbf3fe20SPaul Mullowney EXTERN_C_BEGIN
231*bbf3fe20SPaul Mullowney #undef __FUNCT__
232*bbf3fe20SPaul Mullowney #define __FUNCT__ "MatDestroy_MPIAIJCUSPARSE"
233*bbf3fe20SPaul Mullowney PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A)
234*bbf3fe20SPaul Mullowney {
235*bbf3fe20SPaul Mullowney   PetscErrorCode ierr;
236*bbf3fe20SPaul Mullowney   Mat_MPIAIJ *a  = (Mat_MPIAIJ*)A->data;
237*bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
238*bbf3fe20SPaul Mullowney 
239*bbf3fe20SPaul Mullowney   PetscFunctionBegin;
240*bbf3fe20SPaul Mullowney   try {
241*bbf3fe20SPaul Mullowney     delete cusparseStruct;
242*bbf3fe20SPaul Mullowney   } catch(char* ex) {
243*bbf3fe20SPaul Mullowney     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex);
244*bbf3fe20SPaul Mullowney   }
245*bbf3fe20SPaul Mullowney   cusparseStruct = 0;
246*bbf3fe20SPaul Mullowney   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
247*bbf3fe20SPaul Mullowney   PetscFunctionReturn(0);
248*bbf3fe20SPaul Mullowney }
249*bbf3fe20SPaul Mullowney EXTERN_C_END
250ca45077fSPaul Mullowney 
2519ae82921SPaul Mullowney EXTERN_C_BEGIN
2529ae82921SPaul Mullowney #undef __FUNCT__
2539ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE"
254*bbf3fe20SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJCUSPARSE(Mat A)
2559ae82921SPaul Mullowney {
2569ae82921SPaul Mullowney   PetscErrorCode ierr;
257*bbf3fe20SPaul Mullowney   Mat_MPIAIJ *a;
258*bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct;
2599ae82921SPaul Mullowney 
2609ae82921SPaul Mullowney   PetscFunctionBegin;
261*bbf3fe20SPaul Mullowney   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
262*bbf3fe20SPaul Mullowney   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIAIJSetPreallocation_C",
2639ae82921SPaul Mullowney 					   "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE",
2649ae82921SPaul Mullowney 					   MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
265*bbf3fe20SPaul Mullowney   a  = (Mat_MPIAIJ*)A->data;
266*bbf3fe20SPaul Mullowney   a->spptr                      = new Mat_MPIAIJCUSPARSE;
267*bbf3fe20SPaul Mullowney   cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
268*bbf3fe20SPaul Mullowney   cusparseStruct->diagGPUMatFormat    = MAT_DIAGBLOCK_CSR;
269*bbf3fe20SPaul Mullowney   cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR;
270*bbf3fe20SPaul Mullowney   A->ops->getvecs        = MatGetVecs_MPIAIJCUSPARSE;
271*bbf3fe20SPaul Mullowney   A->ops->mult           = MatMult_MPIAIJCUSPARSE;
272*bbf3fe20SPaul Mullowney   A->ops->multtranspose  = MatMultTranspose_MPIAIJCUSPARSE;
273*bbf3fe20SPaul Mullowney   A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE;
274*bbf3fe20SPaul Mullowney   A->ops->setoption      = MatSetOption_MPIAIJCUSPARSE;
275*bbf3fe20SPaul Mullowney   A->ops->destroy        = MatDestroy_MPIAIJCUSPARSE;
276*bbf3fe20SPaul Mullowney   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2779ae82921SPaul Mullowney   PetscFunctionReturn(0);
2789ae82921SPaul Mullowney }
2799ae82921SPaul Mullowney EXTERN_C_END
2809ae82921SPaul Mullowney 
2819ae82921SPaul Mullowney 
2829ae82921SPaul Mullowney #undef __FUNCT__
2839ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE"
2849ae82921SPaul Mullowney PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2859ae82921SPaul Mullowney {
2869ae82921SPaul Mullowney   PetscErrorCode ierr;
2879ae82921SPaul Mullowney   PetscMPIInt    size;
2889ae82921SPaul Mullowney 
2899ae82921SPaul Mullowney   PetscFunctionBegin;
2909ae82921SPaul Mullowney   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2919ae82921SPaul Mullowney   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2929ae82921SPaul Mullowney   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2939ae82921SPaul Mullowney   if (size > 1) {
2949ae82921SPaul Mullowney     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2959ae82921SPaul Mullowney     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2969ae82921SPaul Mullowney   } else {
2979ae82921SPaul Mullowney     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
2989ae82921SPaul Mullowney     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2999ae82921SPaul Mullowney   }
3009ae82921SPaul Mullowney   PetscFunctionReturn(0);
3019ae82921SPaul Mullowney }
3029ae82921SPaul Mullowney 
3039ae82921SPaul Mullowney /*MC
3049ae82921SPaul Mullowney    MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices.
3059ae82921SPaul Mullowney 
3069ae82921SPaul Mullowney    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
3079ae82921SPaul Mullowney    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
3089ae82921SPaul Mullowney   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
3099ae82921SPaul Mullowney   for communicators controlling multiple processes.  It is recommended that you call both of
3109ae82921SPaul Mullowney   the above preallocation routines for simplicity.
3119ae82921SPaul Mullowney 
3129ae82921SPaul Mullowney    Options Database Keys:
3139ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
3149ae82921SPaul Mullowney 
3159ae82921SPaul Mullowney   Level: beginner
3169ae82921SPaul Mullowney 
3179ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE
3189ae82921SPaul Mullowney M*/
3199ae82921SPaul Mullowney 
320