xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965) !
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
6 
7 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS                 from,to;
20   Vec                gvec;
21   int                rank=sbaij->rank,lsize,size=sbaij->size;
22   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
23   PetscMap           vecmap;
24   PetscScalar        *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->lvec) {
28     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
29     sbaij->lvec = 0;
30   }
31   if (sbaij->Mvctx) {
32     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
33     sbaij->Mvctx = 0;
34   }
35 
36   /* For the first stab we make an array as long as the number of columns */
37   /* mark those columns that are in sbaij->B */
38   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
39   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<mbs; i++) {
41     for (j=0; j<B->ilen[i]; j++) {
42       if (!indices[aj[B->i[i] + j]]) ec++;
43       indices[aj[B->i[i] + j] ] = 1;
44     }
45   }
46 
47   /* form arrays of columns we need */
48   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
49   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
50   ec_owner = sgarray + 2*ec;
51 
52   ec = 0;
53   for (j=0; j<size; j++){
54     for (i=owners[j]; i<owners[j+1]; i++){
55       if (indices[i]) {
56         garray[ec]   = i;
57         ec_owner[ec] = j;
58         ec++;
59       }
60     }
61   }
62 
63   /* make indices now point into garray */
64   for (i=0; i<ec; i++) indices[garray[i]] = i;
65 
66   /* compact out the extra columns in B */
67   for (i=0; i<mbs; i++) {
68     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
69   }
70   B->nbs      = ec;
71   sbaij->B->n = ec*B->bs;
72   ierr = PetscFree(indices);CHKERRQ(ierr);
73 
74   /* create local vector that is used to scatter into */
75   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
76 
77   /* create two temporary index sets for building scatter-gather */
78   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
79   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
80   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
81 
82   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
83   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
84 
85   /* generate the scatter context
86      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
87   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
88   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
89   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
90 
91   sbaij->garray = garray;
92   PetscLogObjectParent(mat,sbaij->Mvctx);
93   PetscLogObjectParent(mat,sbaij->lvec);
94   PetscLogObjectParent(mat,from);
95   PetscLogObjectParent(mat,to);
96 
97   ierr = ISDestroy(from);CHKERRQ(ierr);
98   ierr = ISDestroy(to);CHKERRQ(ierr);
99 
100   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
101   lsize = (mbs + ec)*bs;
102   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
103   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
104   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
105 
106   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
107   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
108 
109   /* x index in the IS sfrom */
110   for (i=0; i<ec; i++) {
111     j = ec_owner[i];
112     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
113   }
114   /* b index in the IS sfrom */
115   k = sowners[rank]/bs + mbs;
116   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
117 
118   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
120 
121   /* x index in the IS sto */
122   k = sowners[rank]/bs + mbs;
123   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
124   /* b index in the IS sto */
125   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
126 
127   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
128 
129   /* gnerate the SBAIJ scatter context */
130   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
131 
132    /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
138 
139   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
140   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
141   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
143   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
144 
145   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
146   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
147   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
148 
149   ierr = PetscFree(stmp);CHKERRQ(ierr);
150   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
151 
152   PetscLogObjectParent(mat,sbaij->sMvctx);
153   PetscLogObjectParent(mat,sbaij->slvec0);
154   PetscLogObjectParent(mat,sbaij->slvec1);
155   PetscLogObjectParent(mat,sbaij->slvec0b);
156   PetscLogObjectParent(mat,sbaij->slvec1a);
157   PetscLogObjectParent(mat,sbaij->slvec1b);
158   PetscLogObjectParent(mat,from);
159   PetscLogObjectParent(mat,to);
160 
161   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
162   ierr = ISDestroy(from);CHKERRQ(ierr);
163   ierr = ISDestroy(to);CHKERRQ(ierr);
164   ierr = VecDestroy(gvec);CHKERRQ(ierr);
165   ierr = PetscFree(sgarray);CHKERRQ(ierr);
166 
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
172 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
173 {
174   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
175   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
176   PetscErrorCode ierr;
177   int                i,j,*aj = B->j,ec = 0,*garray;
178   int                bs = baij->bs,*stmp;
179   IS                 from,to;
180   Vec                gvec;
181 #if defined (PETSC_USE_CTABLE)
182   PetscTable         gid1_lid1;
183   PetscTablePosition tpos;
184   int                gid,lid;
185 #else
186   int                Nbs = baij->Nbs,*indices;
187 #endif
188 
189   PetscFunctionBegin;
190 
191 #if defined (PETSC_USE_CTABLE)
192   /* use a table - Mark Adams */
193   PetscTableCreate(B->mbs,&gid1_lid1);
194   for (i=0; i<B->mbs; i++) {
195     for (j=0; j<B->ilen[i]; j++) {
196       int data,gid1 = aj[B->i[i]+j] + 1;
197       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
198       if (!data) {
199         /* one based table */
200         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
201       }
202     }
203   }
204   /* form array of columns we need */
205   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
206   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
207   while (tpos) {
208     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
209     gid--; lid--;
210     garray[lid] = gid;
211   }
212   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
213   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
214   for (i=0; i<ec; i++) {
215     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
216   }
217   /* compact out the extra columns in B */
218   for (i=0; i<B->mbs; i++) {
219     for (j=0; j<B->ilen[i]; j++) {
220       int gid1 = aj[B->i[i] + j] + 1;
221       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
222       lid --;
223       aj[B->i[i]+j] = lid;
224     }
225   }
226   B->nbs     = ec;
227   baij->B->n = ec*B->bs;
228   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
229   /* Mark Adams */
230 #else
231   /* For the first stab we make an array as long as the number of columns */
232   /* mark those columns that are in baij->B */
233   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
234   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
235   for (i=0; i<B->mbs; i++) {
236     for (j=0; j<B->ilen[i]; j++) {
237       if (!indices[aj[B->i[i] + j]]) ec++;
238       indices[aj[B->i[i] + j] ] = 1;
239     }
240   }
241 
242   /* form array of columns we need */
243   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
244   ec = 0;
245   for (i=0; i<Nbs; i++) {
246     if (indices[i]) {
247       garray[ec++] = i;
248     }
249   }
250 
251   /* make indices now point into garray */
252   for (i=0; i<ec; i++) {
253     indices[garray[i]] = i;
254   }
255 
256   /* compact out the extra columns in B */
257   for (i=0; i<B->mbs; i++) {
258     for (j=0; j<B->ilen[i]; j++) {
259       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
260     }
261   }
262   B->nbs       = ec;
263   baij->B->n   = ec*B->bs;
264   ierr = PetscFree(indices);CHKERRQ(ierr);
265 #endif
266 
267   /* create local vector that is used to scatter into */
268   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
269 
270   /* create two temporary index sets for building scatter-gather */
271   for (i=0; i<ec; i++) {
272     garray[i] = bs*garray[i];
273   }
274   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
275   for (i=0; i<ec; i++) {
276     garray[i] = garray[i]/bs;
277   }
278 
279   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
280   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
281   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
282   ierr = PetscFree(stmp);CHKERRQ(ierr);
283 
284   /* create temporary global vector to generate scatter context */
285   /* this is inefficient, but otherwise we must do either
286      1) save garray until the first actual scatter when the vector is known or
287      2) have another way of generating a scatter context without a vector.*/
288   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
289 
290   /* gnerate the scatter context */
291   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
292 
293   /*
294       Post the receives for the first matrix vector product. We sync-chronize after
295     this on the chance that the user immediately calls MatMult() after assemblying
296     the matrix.
297   */
298   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
299   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
300 
301   PetscLogObjectParent(mat,baij->Mvctx);
302   PetscLogObjectParent(mat,baij->lvec);
303   PetscLogObjectParent(mat,from);
304   PetscLogObjectParent(mat,to);
305   baij->garray = garray;
306   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
307   ierr = ISDestroy(from);CHKERRQ(ierr);
308   ierr = ISDestroy(to);CHKERRQ(ierr);
309   ierr = VecDestroy(gvec);CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 
313 
314 /*
315      Takes the local part of an already assembled MPIBAIJ matrix
316    and disassembles it. This is to allow new nonzeros into the matrix
317    that require more communication in the matrix vector multiply.
318    Thus certain data-structures must be rebuilt.
319 
320    Kind of slow! But that's what application programmers get when
321    they are sloppy.
322 */
323 #undef __FUNCT__
324 #define __FUNCT__ "DisAssemble_MPISBAIJ"
325 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
326 {
327   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
328   Mat           B = baij->B,Bnew;
329   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
330   PetscErrorCode ierr;
331   int i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
332   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
333   MatScalar     *a = Bbaij->a;
334   PetscScalar   *atmp;
335 #if defined(PETSC_USE_MAT_SINGLE)
336   int           l;
337 #endif
338 
339   PetscFunctionBegin;
340 #if defined(PETSC_USE_MAT_SINGLE)
341   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
342 #endif
343   /* free stuff related to matrix-vec multiply */
344   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
345   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
346   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
347   if (baij->colmap) {
348 #if defined (PETSC_USE_CTABLE)
349     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
350 #else
351     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
352     baij->colmap = 0;
353     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
354 #endif
355   }
356 
357   /* make sure that B is assembled so we can access its values */
358   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360 
361   /* invent new B and copy stuff over */
362   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
363   for (i=0; i<mbs; i++) {
364     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
365   }
366   ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr);
367   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
368   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
369   ierr = PetscFree(nz);CHKERRQ(ierr);
370 
371   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
372   for (i=0; i<mbs; i++) {
373     rvals[0] = bs*i;
374     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
375     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
376       col = garray[Bbaij->j[j]]*bs;
377       for (k=0; k<bs; k++) {
378 #if defined(PETSC_USE_MAT_SINGLE)
379         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
380 #else
381         atmp = a+j*bs2 + k*bs;
382 #endif
383         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
384         col++;
385       }
386     }
387   }
388 #if defined(PETSC_USE_MAT_SINGLE)
389   ierr = PetscFree(atmp);CHKERRQ(ierr);
390 #endif
391   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
392   baij->garray = 0;
393   ierr = PetscFree(rvals);CHKERRQ(ierr);
394   PetscLogObjectMemory(A,-ec*sizeof(int));
395   ierr = MatDestroy(B);CHKERRQ(ierr);
396   PetscLogObjectParent(A,Bnew);
397   baij->B = Bnew;
398   A->was_assembled = PETSC_FALSE;
399   PetscFunctionReturn(0);
400 }
401 
402 
403 
404