xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 7d5fd1e4d9337468ad3f05b65b7facdcd2dfd2a4)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscErrorCode ierr;
12   PetscInt       Nbs = sbaij->Nbs,i,j,*aj = B->j,ec = 0,*garray,*sgarray;
13   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
14   IS             from,to;
15   Vec            gvec;
16   PetscMPIInt    rank = sbaij->rank,lsize;
17   PetscInt       *owners = sbaij->rangebs,*ec_owner,k;
18   const PetscInt *sowners;
19   PetscScalar    *ptr;
20 #if defined(PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1; /* one-based gid to lid table */
22   PetscTablePosition tpos;
23   PetscInt           gid,lid;
24 #else
25   PetscInt           *indices;
26 #endif
27 
28   PetscFunctionBegin;
29 #if defined(PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(mbs,Nbs+1,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);}
36     }
37   }
38   /* form array of columns we need */
39   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
40   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
41   while (tpos) {
42     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
43     gid--; lid--;
44     garray[lid] = gid;
45   }
46   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
47   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
48   for (i=0; i<ec; i++) {ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);}
49   /* compact out the extra columns in B */
50   for (i=0; i<B->mbs; i++) {
51     for (j=0; j<B->ilen[i]; j++) {
52       PetscInt gid1 = aj[B->i[i] + j] + 1;
53       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
54       lid--;
55       aj[B->i[i]+j] = lid;
56     }
57   }
58   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
59   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
60   for (i=j=0; i<ec; i++) {
61     while (garray[i]>=owners[j+1]) j++;
62     ec_owner[i] = j;
63   }
64 #else
65   /* For the first stab we make an array as long as the number of columns */
66   /* mark those columns that are in sbaij->B */
67   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
68   for (i=0; i<mbs; i++) {
69     for (j=0; j<B->ilen[i]; j++) {
70       if (!indices[aj[B->i[i] + j]]) ec++;
71       indices[aj[B->i[i] + j]] = 1;
72     }
73   }
74 
75   /* form arrays of columns we need */
76   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
77   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
78 
79   ec = 0;
80   for (j=0; j<sbaij->size; j++) {
81     for (i=owners[j]; i<owners[j+1]; i++) {
82       if (indices[i]) {
83         garray[ec]   = i;
84         ec_owner[ec] = j;
85         ec++;
86       }
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) indices[garray[i]] = i;
92 
93   /* compact out the extra columns in B */
94   for (i=0; i<mbs; i++) {
95     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96   }
97   ierr = PetscFree(indices);CHKERRQ(ierr);
98 #endif
99   B->nbs = ec;
100   ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr);
101   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr);
102 
103   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
109   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
110   for (i=0; i<ec; i++) stmp[i] = i;
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
112 
113   /* generate the scatter context
114      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
115   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
116   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
117   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
118 
119   sbaij->garray = garray;
120 
121   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
122   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
123   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
125 
126   ierr = ISDestroy(&from);CHKERRQ(ierr);
127   ierr = ISDestroy(&to);CHKERRQ(ierr);
128 
129   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
130   lsize = (mbs + ec)*bs;
131   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
132   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
133   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
134 
135   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
136 
137   /* x index in the IS sfrom */
138   for (i=0; i<ec; i++) {
139     j          = ec_owner[i];
140     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
141   }
142   /* b index in the IS sfrom */
143   k = sowners[rank]/bs + mbs;
144   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
145   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
146 
147   /* x index in the IS sto */
148   k = sowners[rank]/bs + mbs;
149   for (i=0; i<ec; i++) stmp[i] = (k + i);
150   /* b index in the IS sto */
151   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
152 
153   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
154 
155   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
156 
157   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
158   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
159   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
160   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
161   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
162 
163   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
164   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
165   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
166 
167   ierr = PetscFree(stmp);CHKERRQ(ierr);
168 
169   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
170   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
171   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
172   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
173   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
174   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
175   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
176   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
177 
178   ierr = PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt));CHKERRQ(ierr);
179   ierr = ISDestroy(&from);CHKERRQ(ierr);
180   ierr = ISDestroy(&to);CHKERRQ(ierr);
181   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 /*
186      Takes the local part of an already assembled MPISBAIJ matrix
187    and disassembles it. This is to allow new nonzeros into the matrix
188    that require more communication in the matrix vector multiply.
189    Thus certain data-structures must be rebuilt.
190 
191    Kind of slow! But that's what application programmers get when
192    they are sloppy.
193 */
194 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
195 {
196   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
197   Mat            B      = baij->B,Bnew;
198   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
199   PetscErrorCode ierr;
200   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
201   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
202   MatScalar      *a = Bbaij->a;
203   PetscScalar    *atmp;
204 #if defined(PETSC_USE_REAL_MAT_SINGLE)
205   PetscInt l;
206 #endif
207 
208   PetscFunctionBegin;
209 #if defined(PETSC_USE_REAL_MAT_SINGLE)
210   ierr = PetscMalloc1(A->rmap->bs,&atmp);CHKERRQ(ierr);
211 #endif
212   /* free stuff related to matrix-vec multiply */
213   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
214   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
215   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
216 
217   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
218   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
219   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
220   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
221   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
222 
223   if (baij->colmap) {
224 #if defined(PETSC_USE_CTABLE)
225     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
226 #else
227     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
228     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
229 #endif
230   }
231 
232   /* make sure that B is assembled so we can access its values */
233   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
235 
236   /* invent new B and copy stuff over */
237   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
238   for (i=0; i<mbs; i++) {
239     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
240   }
241   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
242   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
243   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
244   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
245   ierr = PetscFree(nz);CHKERRQ(ierr);
246 
247   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
248     ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
249   }
250 
251   /*
252    Ensure that B's nonzerostate is monotonically increasing.
253    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
254    */
255   Bnew->nonzerostate = B->nonzerostate;
256   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
257   for (i=0; i<mbs; i++) {
258     rvals[0] = bs*i;
259     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
260     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
261       col = garray[Bbaij->j[j]]*bs;
262       for (k=0; k<bs; k++) {
263 #if defined(PETSC_USE_REAL_MAT_SINGLE)
264         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
265 #else
266         atmp = a+j*bs2 + k*bs;
267 #endif
268         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
269         col++;
270       }
271     }
272   }
273 #if defined(PETSC_USE_REAL_MAT_SINGLE)
274   ierr = PetscFree(atmp);CHKERRQ(ierr);
275 #endif
276   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
277 
278   baij->garray = NULL;
279 
280   ierr = PetscFree(rvals);CHKERRQ(ierr);
281   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
282   ierr = MatDestroy(&B);CHKERRQ(ierr);
283   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
284 
285   baij->B = Bnew;
286 
287   A->was_assembled = PETSC_FALSE;
288   PetscFunctionReturn(0);
289 }
290