xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS             from,to;
20   Vec            gvec;
21   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
22   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
23   PetscScalar    *ptr;
24 
25   PetscFunctionBegin;
26   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
27 
28   /* For the first stab we make an array as long as the number of columns */
29   /* mark those columns that are in sbaij->B */
30   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
31   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
32   for (i=0; i<mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       if (!indices[aj[B->i[i] + j]]) ec++;
35       indices[aj[B->i[i] + j] ] = 1;
36     }
37   }
38 
39   /* form arrays of columns we need */
40   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
41   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
42 
43   ec = 0;
44   for (j=0; j<size; j++) {
45     for (i=owners[j]; i<owners[j+1]; i++) {
46       if (indices[i]) {
47         garray[ec]   = i;
48         ec_owner[ec] = j;
49         ec++;
50       }
51     }
52   }
53 
54   /* make indices now point into garray */
55   for (i=0; i<ec; i++) indices[garray[i]] = i;
56 
57   /* compact out the extra columns in B */
58   for (i=0; i<mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
60   }
61   B->nbs      = ec;
62   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
63   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
64   ierr = PetscFree(indices);CHKERRQ(ierr);
65 
66   /* create local vector that is used to scatter into */
67   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
68 
69   /* create two temporary index sets for building scatter-gather */
70   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
71   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
72   for (i=0; i<ec; i++) { stmp[i] = i; }
73   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
74 
75   /* generate the scatter context
76      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
77   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
78   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
79   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
80 
81   sbaij->garray = garray;
82   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
83   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
84   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
85   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
86 
87   ierr = ISDestroy(&from);CHKERRQ(ierr);
88   ierr = ISDestroy(&to);CHKERRQ(ierr);
89 
90   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
91   lsize = (mbs + ec)*bs;
92   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
93   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
94   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
95 
96   sowners = sbaij->slvec0->map->range;
97 
98   /* x index in the IS sfrom */
99   for (i=0; i<ec; i++) {
100     j = ec_owner[i];
101     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
102   }
103   /* b index in the IS sfrom */
104   k = sowners[rank]/bs + mbs;
105   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
106   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
107 
108   /* x index in the IS sto */
109   k = sowners[rank]/bs + mbs;
110   for (i=0; i<ec; i++) stmp[i] = (k + i);
111   /* b index in the IS sto */
112   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
113 
114   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
115 
116   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
117 
118   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
119   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
120   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
122   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
123 
124   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
126   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
127 
128   ierr = PetscFree(stmp);CHKERRQ(ierr);
129   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
130 
131   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
139 
140   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
141   ierr = ISDestroy(&from);CHKERRQ(ierr);
142   ierr = ISDestroy(&to);CHKERRQ(ierr);
143   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
149 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
150 {
151   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
152   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
153   PetscErrorCode     ierr;
154   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
155   PetscInt           bs = mat->rmap->bs,*stmp;
156   IS                 from,to;
157   Vec                gvec;
158 #if defined (PETSC_USE_CTABLE)
159   PetscTable         gid1_lid1;
160   PetscTablePosition tpos;
161   PetscInt           gid,lid;
162 #else
163   PetscInt           Nbs = baij->Nbs,*indices;
164 #endif
165 
166   PetscFunctionBegin;
167 #if defined (PETSC_USE_CTABLE)
168   /* use a table - Mark Adams */
169   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
170   for (i=0; i<B->mbs; i++) {
171     for (j=0; j<B->ilen[i]; j++) {
172       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
173       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
174       if (!data) {
175         /* one based table */
176         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
177       }
178     }
179   }
180   /* form array of columns we need */
181   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
182   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
183   while (tpos) {
184     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
185     gid--; lid--;
186     garray[lid] = gid;
187   }
188   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
189   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
190   for (i=0; i<ec; i++) {
191     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
192   }
193   /* compact out the extra columns in B */
194   for (i=0; i<B->mbs; i++) {
195     for (j=0; j<B->ilen[i]; j++) {
196       PetscInt gid1 = aj[B->i[i] + j] + 1;
197       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
198       lid --;
199       aj[B->i[i]+j] = lid;
200     }
201   }
202   B->nbs     = ec;
203   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
204   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
205   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
206 #else
207   /* For the first stab we make an array as long as the number of columns */
208   /* mark those columns that are in baij->B */
209   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
210   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
211   for (i=0; i<B->mbs; i++) {
212     for (j=0; j<B->ilen[i]; j++) {
213       if (!indices[aj[B->i[i] + j]]) ec++;
214       indices[aj[B->i[i] + j] ] = 1;
215     }
216   }
217 
218   /* form array of columns we need */
219   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
220   ec = 0;
221   for (i=0; i<Nbs; i++) {
222     if (indices[i]) {
223       garray[ec++] = i;
224     }
225   }
226 
227   /* make indices now point into garray */
228   for (i=0; i<ec; i++) {
229     indices[garray[i]] = i;
230   }
231 
232   /* compact out the extra columns in B */
233   for (i=0; i<B->mbs; i++) {
234     for (j=0; j<B->ilen[i]; j++) {
235       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
236     }
237   }
238   B->nbs       = ec;
239   baij->B->cmap->n   = ec*mat->rmap->bs;
240   ierr = PetscFree(indices);CHKERRQ(ierr);
241 #endif
242 
243   /* create local vector that is used to scatter into */
244   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
245 
246   /* create two temporary index sets for building scatter-gather */
247   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
248 
249   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
250   for (i=0; i<ec; i++) { stmp[i] = i; }
251   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
252 
253   /* create temporary global vector to generate scatter context */
254   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
255   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
256   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
257 
258   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
259   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
260   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
261   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
262   baij->garray = garray;
263   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
264   ierr = ISDestroy(&from);CHKERRQ(ierr);
265   ierr = ISDestroy(&to);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 /*
270      Takes the local part of an already assembled MPISBAIJ matrix
271    and disassembles it. This is to allow new nonzeros into the matrix
272    that require more communication in the matrix vector multiply.
273    Thus certain data-structures must be rebuilt.
274 
275    Kind of slow! But that's what application programmers get when
276    they are sloppy.
277 */
278 #undef __FUNCT__
279 #define __FUNCT__ "MatDisAssemble_MPISBAIJ"
280 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
281 {
282   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
283   Mat            B = baij->B,Bnew;
284   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
285   PetscErrorCode ierr;
286   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
287   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
288   MatScalar      *a = Bbaij->a;
289   PetscScalar    *atmp;
290 #if defined(PETSC_USE_REAL_MAT_SINGLE)
291   PetscInt       l;
292 #endif
293 
294   PetscFunctionBegin;
295 #if defined(PETSC_USE_REAL_MAT_SINGLE)
296   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
297 #endif
298   /* free stuff related to matrix-vec multiply */
299   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
300   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
301   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
302 
303   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
304   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
305   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
306   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
307   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
308 
309   if (baij->colmap) {
310 #if defined (PETSC_USE_CTABLE)
311     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
312 #else
313     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
314     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
315 #endif
316   }
317 
318   /* make sure that B is assembled so we can access its values */
319   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321 
322   /* invent new B and copy stuff over */
323   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
324   for (i=0; i<mbs; i++) {
325     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
326   }
327   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
328   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
329   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
330   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
331   ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
332   ierr = PetscFree(nz);CHKERRQ(ierr);
333 
334   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
335   for (i=0; i<mbs; i++) {
336     rvals[0] = bs*i;
337     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
338     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
339       col = garray[Bbaij->j[j]]*bs;
340       for (k=0; k<bs; k++) {
341 #if defined(PETSC_USE_REAL_MAT_SINGLE)
342         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
343 #else
344         atmp = a+j*bs2 + k*bs;
345 #endif
346         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
347         col++;
348       }
349     }
350   }
351 #if defined(PETSC_USE_REAL_MAT_SINGLE)
352   ierr = PetscFree(atmp);CHKERRQ(ierr);
353 #endif
354   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
355   baij->garray = 0;
356   ierr = PetscFree(rvals);CHKERRQ(ierr);
357   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
358   ierr = MatDestroy(&B);CHKERRQ(ierr);
359   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
360   baij->B = Bnew;
361   A->was_assembled = PETSC_FALSE;
362   PetscFunctionReturn(0);
363 }
364 
365 
366 
367