xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision 9ae82921df069a58776bfe4da82b38e8ff7dd41c)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 
3 EXTERN_C_BEGIN
4 #undef __FUNCT__
5 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE"
6 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
7 {
8   Mat_MPIAIJ     *b;
9   PetscErrorCode ierr;
10   PetscInt       i;
11 
12   PetscFunctionBegin;
13   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
14   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
15   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
16   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
17 
18   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
19   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
20   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
21   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
22   if (d_nnz) {
23     for (i=0; i<B->rmap->n; i++) {
24       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
25     }
26   }
27   if (o_nnz) {
28     for (i=0; i<B->rmap->n; i++) {
29       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
30     }
31   }
32   b = (Mat_MPIAIJ*)B->data;
33 
34   if (!B->preallocated) {
35     /* Explicitly create 2 MATSEQAIJ matrices. */
36     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
37     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
38     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
39     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
40     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
41     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
42     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
43     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
44   }
45 
46   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
47   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
48   B->preallocated = PETSC_TRUE;
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 #undef __FUNCT__
53 #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE"
54 PetscErrorCode  MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left)
55 {
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   if (right) {
60     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
61     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
62     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
63     ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr);
64     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
65   }
66   if (left) {
67     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
68     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
69     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
70     ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr);
71     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 
77 #ifdef PETSC_HAVE_TXPETSCGPU
78 #undef __FUNCT__
79 #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
80 PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
81 {
82   // This multiplication sequence is different sequence
83   // than the CPU version. In particular, the diagonal block
84   // multiplication kernel is launched in one stream. Then,
85   // in a separate stream, the data transfers from DeviceToHost
86   // (with MPI messaging in between), then HostToDevice are
87   // launched. Once the data transfer stream is synchronized,
88   // to ensure messaging is complete, the MatMultAdd kernel
89   // is launched in the original (MatMult) stream to protect
90   // against race conditions.
91   //
92   // This sequence should only be called for GPU computation.
93   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       nt;
96 
97   PetscFunctionBegin;
98   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
99   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
100   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
101   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
102   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
105   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 #endif
109 
110 EXTERN_C_BEGIN
111 PetscErrorCode  MatCreate_MPIAIJ(Mat);
112 EXTERN_C_END
113 //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
114 
115 EXTERN_C_BEGIN
116 #undef __FUNCT__
117 #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE"
118 PetscErrorCode  MatCreate_MPIAIJCUSPARSE(Mat B)
119 {
120   PetscErrorCode ierr;
121 
122   PetscFunctionBegin;
123   ierr = MatCreate_MPIAIJ(B);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
125                                      "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE",
126                                       MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
127   B->ops->getvecs        = MatGetVecs_MPIAIJCUSPARSE;
128   //B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSPARSE;
129 #ifdef PETSC_HAVE_TXPETSCGPU
130   B->ops->mult        = MatMult_MPIAIJCUSPARSE;
131 #endif
132   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatCreateAIJCUSPARSE"
140 PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
141 {
142   PetscErrorCode ierr;
143   PetscMPIInt    size;
144 
145   PetscFunctionBegin;
146   ierr = MatCreate(comm,A);CHKERRQ(ierr);
147   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
148   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
149   if (size > 1) {
150     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
151     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
152   } else {
153     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
154     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 /*MC
160    MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices.
161 
162    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
163    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
164   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
165   for communicators controlling multiple processes.  It is recommended that you call both of
166   the above preallocation routines for simplicity.
167 
168    Options Database Keys:
169 . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
170 
171   Level: beginner
172 
173 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE
174 M*/
175 
176