xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rowners,*sowners,*ec_owner,k;
24   PetscMap       vecmap;
25   PetscScalar    *ptr;
26 
27   PetscFunctionBegin;
28   if (sbaij->sMvctx) {
29     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
30     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
31     sbaij->sMvctx = 0;
32   }
33 
34   /* For the first stab we make an array as long as the number of columns */
35   /* mark those columns that are in sbaij->B */
36   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
37   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
38   for (i=0; i<mbs; i++) {
39     for (j=0; j<B->ilen[i]; j++) {
40       if (!indices[aj[B->i[i] + j]]) ec++;
41       indices[aj[B->i[i] + j] ] = 1;
42     }
43   }
44 
45   /* form arrays of columns we need */
46   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
47   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
48   ec_owner = sgarray + 2*ec;
49 
50   ec = 0;
51   for (j=0; j<size; j++){
52     for (i=owners[j]; i<owners[j+1]; i++){
53       if (indices[i]) {
54         garray[ec]   = i;
55         ec_owner[ec] = j;
56         ec++;
57       }
58     }
59   }
60 
61   /* make indices now point into garray */
62   for (i=0; i<ec; i++) indices[garray[i]] = i;
63 
64   /* compact out the extra columns in B */
65   for (i=0; i<mbs; i++) {
66     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
67   }
68   B->nbs      = ec;
69   sbaij->B->n = ec*mat->bs;
70   ierr = PetscFree(indices);CHKERRQ(ierr);
71 
72   /* create local vector that is used to scatter into */
73   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
74 
75   /* create two temporary index sets for building scatter-gather */
76   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
77   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
78   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
79 
80   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
81   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
82 
83   /* generate the scatter context
84      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
85   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
86   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
87   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
88 
89   sbaij->garray = garray;
90   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
91   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
92   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
93   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
94 
95   ierr = ISDestroy(from);CHKERRQ(ierr);
96   ierr = ISDestroy(to);CHKERRQ(ierr);
97 
98   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
99   lsize = (mbs + ec)*bs;
100   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
101   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
102   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
103 
104   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
105   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
106 
107   /* x index in the IS sfrom */
108   for (i=0; i<ec; i++) {
109     j = ec_owner[i];
110     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
111   }
112   /* b index in the IS sfrom */
113   k = sowners[rank]/bs + mbs;
114   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
115 
116   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
118 
119   /* x index in the IS sto */
120   k = sowners[rank]/bs + mbs;
121   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
122   /* b index in the IS sto */
123   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
124 
125   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
126 
127   /* gnerate the SBAIJ scatter context */
128   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
129 
130    /*
131       Post the receives for the first matrix vector product. We sync-chronize after
132     this on the chance that the user immediately calls MatMult() after assemblying
133     the matrix.
134   */
135   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
136 
137   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
138   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
139   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
140   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
141   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
142 
143   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
144   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
145   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
146 
147   ierr = PetscFree(stmp);CHKERRQ(ierr);
148   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
149 
150   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
151   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
152   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
156   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
157   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
158 
159   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
160   ierr = ISDestroy(from);CHKERRQ(ierr);
161   ierr = ISDestroy(to);CHKERRQ(ierr);
162   ierr = VecDestroy(gvec);CHKERRQ(ierr);
163   ierr = PetscFree(sgarray);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
169 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
170 {
171   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
172   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
173   PetscErrorCode     ierr;
174   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
175   PetscInt           bs = mat->bs,*stmp;
176   IS                 from,to;
177   Vec                gvec;
178 #if defined (PETSC_USE_CTABLE)
179   PetscTable         gid1_lid1;
180   PetscTablePosition tpos;
181   PetscInt           gid,lid;
182 #else
183   PetscInt           Nbs = baij->Nbs,*indices;
184 #endif
185 
186   PetscFunctionBegin;
187 #if defined (PETSC_USE_CTABLE)
188   /* use a table - Mark Adams */
189   PetscTableCreate(B->mbs,&gid1_lid1);
190   for (i=0; i<B->mbs; i++) {
191     for (j=0; j<B->ilen[i]; j++) {
192       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
193       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
194       if (!data) {
195         /* one based table */
196         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
197       }
198     }
199   }
200   /* form array of columns we need */
201   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
202   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
203   while (tpos) {
204     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
205     gid--; lid--;
206     garray[lid] = gid;
207   }
208   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
209   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
210   for (i=0; i<ec; i++) {
211     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
212   }
213   /* compact out the extra columns in B */
214   for (i=0; i<B->mbs; i++) {
215     for (j=0; j<B->ilen[i]; j++) {
216       PetscInt gid1 = aj[B->i[i] + j] + 1;
217       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
218       lid --;
219       aj[B->i[i]+j] = lid;
220     }
221   }
222   B->nbs     = ec;
223   baij->B->n = ec*mat->bs;
224   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
225   /* Mark Adams */
226 #else
227   /* For the first stab we make an array as long as the number of columns */
228   /* mark those columns that are in baij->B */
229   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
230   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
231   for (i=0; i<B->mbs; i++) {
232     for (j=0; j<B->ilen[i]; j++) {
233       if (!indices[aj[B->i[i] + j]]) ec++;
234       indices[aj[B->i[i] + j] ] = 1;
235     }
236   }
237 
238   /* form array of columns we need */
239   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
240   ec = 0;
241   for (i=0; i<Nbs; i++) {
242     if (indices[i]) {
243       garray[ec++] = i;
244     }
245   }
246 
247   /* make indices now point into garray */
248   for (i=0; i<ec; i++) {
249     indices[garray[i]] = i;
250   }
251 
252   /* compact out the extra columns in B */
253   for (i=0; i<B->mbs; i++) {
254     for (j=0; j<B->ilen[i]; j++) {
255       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
256     }
257   }
258   B->nbs       = ec;
259   baij->B->n   = ec*mat->bs;
260   ierr = PetscFree(indices);CHKERRQ(ierr);
261 #endif
262 
263   /* create local vector that is used to scatter into */
264   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
265 
266   /* create two temporary index sets for building scatter-gather */
267   for (i=0; i<ec; i++) {
268     garray[i] = bs*garray[i];
269   }
270   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
271   for (i=0; i<ec; i++) {
272     garray[i] = garray[i]/bs;
273   }
274 
275   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
276   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
277   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
278   ierr = PetscFree(stmp);CHKERRQ(ierr);
279 
280   /* create temporary global vector to generate scatter context */
281   /* this is inefficient, but otherwise we must do either
282      1) save garray until the first actual scatter when the vector is known or
283      2) have another way of generating a scatter context without a vector.*/
284   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
285 
286   /* gnerate the scatter context */
287   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
288 
289   /*
290       Post the receives for the first matrix vector product. We sync-chronize after
291     this on the chance that the user immediately calls MatMult() after assemblying
292     the matrix.
293   */
294   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
295   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
296 
297   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
298   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
299   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
300   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
301   baij->garray = garray;
302   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
303   ierr = ISDestroy(from);CHKERRQ(ierr);
304   ierr = ISDestroy(to);CHKERRQ(ierr);
305   ierr = VecDestroy(gvec);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 /*
310      Takes the local part of an already assembled MPISBAIJ matrix
311    and disassembles it. This is to allow new nonzeros into the matrix
312    that require more communication in the matrix vector multiply.
313    Thus certain data-structures must be rebuilt.
314 
315    Kind of slow! But that's what application programmers get when
316    they are sloppy.
317 */
318 #undef __FUNCT__
319 #define __FUNCT__ "DisAssemble_MPISBAIJ"
320 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
321 {
322   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
323   Mat            B = baij->B,Bnew;
324   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
325   PetscErrorCode ierr;
326   PetscInt       i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
327   PetscInt       k,bs=A->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
328   MatScalar      *a = Bbaij->a;
329   PetscScalar    *atmp;
330 #if defined(PETSC_USE_MAT_SINGLE)
331   PetscInt       l;
332 #endif
333 
334   PetscFunctionBegin;
335 #if defined(PETSC_USE_MAT_SINGLE)
336   ierr = PetscMalloc(A->bs*sizeof(PetscScalar),&atmp);
337 #endif
338   /* free stuff related to matrix-vec multiply */
339   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
340   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
341   baij->lvec = 0;
342   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
343   baij->Mvctx = 0;
344 
345   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
346   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
347   baij->slvec0 = 0;
348   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
349   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
350   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
351   baij->slvec1 = 0;
352 
353   if (baij->colmap) {
354 #if defined (PETSC_USE_CTABLE)
355     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
356 #else
357     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
358     baij->colmap = 0;
359     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
360 #endif
361   }
362 
363   /* make sure that B is assembled so we can access its values */
364   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
365   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
366 
367   /* invent new B and copy stuff over */
368   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
369   for (i=0; i<mbs; i++) {
370     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
371   }
372   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
373   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
374   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
375   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
376   ierr = PetscFree(nz);CHKERRQ(ierr);
377 
378   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
379   for (i=0; i<mbs; i++) {
380     rvals[0] = bs*i;
381     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
382     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
383       col = garray[Bbaij->j[j]]*bs;
384       for (k=0; k<bs; k++) {
385 #if defined(PETSC_USE_MAT_SINGLE)
386         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
387 #else
388         atmp = a+j*bs2 + k*bs;
389 #endif
390         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
391         col++;
392       }
393     }
394   }
395 #if defined(PETSC_USE_MAT_SINGLE)
396   ierr = PetscFree(atmp);CHKERRQ(ierr);
397 #endif
398   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
399   baij->garray = 0;
400   ierr = PetscFree(rvals);CHKERRQ(ierr);
401   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
402   ierr = MatDestroy(B);CHKERRQ(ierr);
403   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
404   baij->B = Bnew;
405   A->was_assembled = PETSC_FALSE;
406   PetscFunctionReturn(0);
407 }
408 
409 
410 
411