xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rowners,*sowners,*ec_owner,k;
24   PetscMap       vecmap;
25   PetscScalar    *ptr;
26 
27   PetscFunctionBegin;
28   if (sbaij->lvec) {
29     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
30     sbaij->lvec = 0;
31   }
32   if (sbaij->Mvctx) {
33     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
34     sbaij->Mvctx = 0;
35   }
36 
37   /* For the first stab we make an array as long as the number of columns */
38   /* mark those columns that are in sbaij->B */
39   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
40   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
41   for (i=0; i<mbs; i++) {
42     for (j=0; j<B->ilen[i]; j++) {
43       if (!indices[aj[B->i[i] + j]]) ec++;
44       indices[aj[B->i[i] + j] ] = 1;
45     }
46   }
47 
48   /* form arrays of columns we need */
49   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
50   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
51   ec_owner = sgarray + 2*ec;
52 
53   ec = 0;
54   for (j=0; j<size; j++){
55     for (i=owners[j]; i<owners[j+1]; i++){
56       if (indices[i]) {
57         garray[ec]   = i;
58         ec_owner[ec] = j;
59         ec++;
60       }
61     }
62   }
63 
64   /* make indices now point into garray */
65   for (i=0; i<ec; i++) indices[garray[i]] = i;
66 
67   /* compact out the extra columns in B */
68   for (i=0; i<mbs; i++) {
69     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
70   }
71   B->nbs      = ec;
72   sbaij->B->n = ec*mat->bs;
73   ierr = PetscFree(indices);CHKERRQ(ierr);
74 
75   /* create local vector that is used to scatter into */
76   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
77 
78   /* create two temporary index sets for building scatter-gather */
79   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
80   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
81   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
82 
83   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
84   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
85 
86   /* generate the scatter context
87      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
88   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
89   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
90   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
91 
92   sbaij->garray = garray;
93   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
94   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
95   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
96   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
97 
98   ierr = ISDestroy(from);CHKERRQ(ierr);
99   ierr = ISDestroy(to);CHKERRQ(ierr);
100 
101   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
102   lsize = (mbs + ec)*bs;
103   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
104   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
105   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
106 
107   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
108   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
109 
110   /* x index in the IS sfrom */
111   for (i=0; i<ec; i++) {
112     j = ec_owner[i];
113     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
114   }
115   /* b index in the IS sfrom */
116   k = sowners[rank]/bs + mbs;
117   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
118 
119   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
120   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
121 
122   /* x index in the IS sto */
123   k = sowners[rank]/bs + mbs;
124   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
125   /* b index in the IS sto */
126   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
127 
128   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
129 
130   /* gnerate the SBAIJ scatter context */
131   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
132 
133    /*
134       Post the receives for the first matrix vector product. We sync-chronize after
135     this on the chance that the user immediately calls MatMult() after assemblying
136     the matrix.
137   */
138   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
139 
140   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
141   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
143   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
144   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
145 
146   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
147   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
148   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
149 
150   ierr = PetscFree(stmp);CHKERRQ(ierr);
151   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
152 
153   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
156   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
157   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
158   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
159   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
160   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
161 
162   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
163   ierr = ISDestroy(from);CHKERRQ(ierr);
164   ierr = ISDestroy(to);CHKERRQ(ierr);
165   ierr = VecDestroy(gvec);CHKERRQ(ierr);
166   ierr = PetscFree(sgarray);CHKERRQ(ierr);
167 
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
173 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
174 {
175   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
176   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
177   PetscErrorCode     ierr;
178   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
179   PetscInt           bs = mat->bs,*stmp;
180   IS                 from,to;
181   Vec                gvec;
182 #if defined (PETSC_USE_CTABLE)
183   PetscTable         gid1_lid1;
184   PetscTablePosition tpos;
185   PetscInt           gid,lid;
186 #else
187   PetscInt           Nbs = baij->Nbs,*indices;
188 #endif
189 
190   PetscFunctionBegin;
191 
192 #if defined (PETSC_USE_CTABLE)
193   /* use a table - Mark Adams */
194   PetscTableCreate(B->mbs,&gid1_lid1);
195   for (i=0; i<B->mbs; i++) {
196     for (j=0; j<B->ilen[i]; j++) {
197       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
198       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
199       if (!data) {
200         /* one based table */
201         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
202       }
203     }
204   }
205   /* form array of columns we need */
206   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
207   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
208   while (tpos) {
209     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
210     gid--; lid--;
211     garray[lid] = gid;
212   }
213   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
214   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
215   for (i=0; i<ec; i++) {
216     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
217   }
218   /* compact out the extra columns in B */
219   for (i=0; i<B->mbs; i++) {
220     for (j=0; j<B->ilen[i]; j++) {
221       PetscInt gid1 = aj[B->i[i] + j] + 1;
222       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
223       lid --;
224       aj[B->i[i]+j] = lid;
225     }
226   }
227   B->nbs     = ec;
228   baij->B->n = ec*mat->bs;
229   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
230   /* Mark Adams */
231 #else
232   /* For the first stab we make an array as long as the number of columns */
233   /* mark those columns that are in baij->B */
234   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
235   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
236   for (i=0; i<B->mbs; i++) {
237     for (j=0; j<B->ilen[i]; j++) {
238       if (!indices[aj[B->i[i] + j]]) ec++;
239       indices[aj[B->i[i] + j] ] = 1;
240     }
241   }
242 
243   /* form array of columns we need */
244   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
245   ec = 0;
246   for (i=0; i<Nbs; i++) {
247     if (indices[i]) {
248       garray[ec++] = i;
249     }
250   }
251 
252   /* make indices now point into garray */
253   for (i=0; i<ec; i++) {
254     indices[garray[i]] = i;
255   }
256 
257   /* compact out the extra columns in B */
258   for (i=0; i<B->mbs; i++) {
259     for (j=0; j<B->ilen[i]; j++) {
260       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
261     }
262   }
263   B->nbs       = ec;
264   baij->B->n   = ec*mat->bs;
265   ierr = PetscFree(indices);CHKERRQ(ierr);
266 #endif
267 
268   /* create local vector that is used to scatter into */
269   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
270 
271   /* create two temporary index sets for building scatter-gather */
272   for (i=0; i<ec; i++) {
273     garray[i] = bs*garray[i];
274   }
275   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
276   for (i=0; i<ec; i++) {
277     garray[i] = garray[i]/bs;
278   }
279 
280   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
281   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
282   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
283   ierr = PetscFree(stmp);CHKERRQ(ierr);
284 
285   /* create temporary global vector to generate scatter context */
286   /* this is inefficient, but otherwise we must do either
287      1) save garray until the first actual scatter when the vector is known or
288      2) have another way of generating a scatter context without a vector.*/
289   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
290 
291   /* gnerate the scatter context */
292   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
293 
294   /*
295       Post the receives for the first matrix vector product. We sync-chronize after
296     this on the chance that the user immediately calls MatMult() after assemblying
297     the matrix.
298   */
299   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
300   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
301 
302   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
303   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
304   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
305   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
306   baij->garray = garray;
307   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
308   ierr = ISDestroy(from);CHKERRQ(ierr);
309   ierr = ISDestroy(to);CHKERRQ(ierr);
310   ierr = VecDestroy(gvec);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 
315 /*
316      Takes the local part of an already assembled MPIBAIJ matrix
317    and disassembles it. This is to allow new nonzeros into the matrix
318    that require more communication in the matrix vector multiply.
319    Thus certain data-structures must be rebuilt.
320 
321    Kind of slow! But that's what application programmers get when
322    they are sloppy.
323 */
324 #undef __FUNCT__
325 #define __FUNCT__ "DisAssemble_MPISBAIJ"
326 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
327 {
328   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
329   Mat           B = baij->B,Bnew;
330   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
331   PetscErrorCode ierr;
332   PetscInt i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
333   PetscInt           k,bs=A->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
334   MatScalar     *a = Bbaij->a;
335   PetscScalar   *atmp;
336 #if defined(PETSC_USE_MAT_SINGLE)
337   PetscInt           l;
338 #endif
339 
340   PetscFunctionBegin;
341 #if defined(PETSC_USE_MAT_SINGLE)
342   ierr = PetscMalloc(A->bs*sizeof(PetscScalar),&atmp);
343 #endif
344   /* free stuff related to matrix-vec multiply */
345   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
346   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
347   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
348   if (baij->colmap) {
349 #if defined (PETSC_USE_CTABLE)
350     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
351 #else
352     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
353     baij->colmap = 0;
354     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
355 #endif
356   }
357 
358   /* make sure that B is assembled so we can access its values */
359   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
361 
362   /* invent new B and copy stuff over */
363   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
364   for (i=0; i<mbs; i++) {
365     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
366   }
367   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
368   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
369   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
370   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
371   ierr = PetscFree(nz);CHKERRQ(ierr);
372 
373   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
374   for (i=0; i<mbs; i++) {
375     rvals[0] = bs*i;
376     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
377     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
378       col = garray[Bbaij->j[j]]*bs;
379       for (k=0; k<bs; k++) {
380 #if defined(PETSC_USE_MAT_SINGLE)
381         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
382 #else
383         atmp = a+j*bs2 + k*bs;
384 #endif
385         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
386         col++;
387       }
388     }
389   }
390 #if defined(PETSC_USE_MAT_SINGLE)
391   ierr = PetscFree(atmp);CHKERRQ(ierr);
392 #endif
393   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
394   baij->garray = 0;
395   ierr = PetscFree(rvals);CHKERRQ(ierr);
396   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
397   ierr = MatDestroy(B);CHKERRQ(ierr);
398   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
399   baij->B = Bnew;
400   A->was_assembled = PETSC_FALSE;
401   PetscFunctionReturn(0);
402 }
403 
404 
405 
406