xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 94ef8dde638caef1d0cd84a7dc8a2db65fcda8b6)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscErrorCode ierr;
12   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
13   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
14   IS             from,to;
15   Vec            gvec;
16   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
17   PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
18   const PetscInt *sowners;
19   PetscScalar    *ptr;
20 
21   PetscFunctionBegin;
22   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
23 
24   /* For the first stab we make an array as long as the number of columns */
25   /* mark those columns that are in sbaij->B */
26   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
27   for (i=0; i<mbs; i++) {
28     for (j=0; j<B->ilen[i]; j++) {
29       if (!indices[aj[B->i[i] + j]]) ec++;
30       indices[aj[B->i[i] + j]] = 1;
31     }
32   }
33 
34   /* form arrays of columns we need */
35   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
36   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
37 
38   ec = 0;
39   for (j=0; j<size; j++) {
40     for (i=owners[j]; i<owners[j+1]; i++) {
41       if (indices[i]) {
42         garray[ec]   = i;
43         ec_owner[ec] = j;
44         ec++;
45       }
46     }
47   }
48 
49   /* make indices now point into garray */
50   for (i=0; i<ec; i++) indices[garray[i]] = i;
51 
52   /* compact out the extra columns in B */
53   for (i=0; i<mbs; i++) {
54     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
55   }
56   B->nbs = ec;
57 
58   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
59 
60   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
61   ierr = PetscFree(indices);CHKERRQ(ierr);
62 
63   /* create local vector that is used to scatter into */
64   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
65 
66   /* create two temporary index sets for building scatter-gather */
67   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
68   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
69   for (i=0; i<ec; i++) stmp[i] = i;
70   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
71 
72   /* generate the scatter context
73      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
74   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
75   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
76   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
77 
78   sbaij->garray = garray;
79 
80   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
81   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
82   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
83   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
84 
85   ierr = ISDestroy(&from);CHKERRQ(ierr);
86   ierr = ISDestroy(&to);CHKERRQ(ierr);
87 
88   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
89   lsize = (mbs + ec)*bs;
90   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
91   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
92   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
93 
94   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
95 
96   /* x index in the IS sfrom */
97   for (i=0; i<ec; i++) {
98     j          = ec_owner[i];
99     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
100   }
101   /* b index in the IS sfrom */
102   k = sowners[rank]/bs + mbs;
103   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
104   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
105 
106   /* x index in the IS sto */
107   k = sowners[rank]/bs + mbs;
108   for (i=0; i<ec; i++) stmp[i] = (k + i);
109   /* b index in the IS sto */
110   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
111 
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
113 
114   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
115 
116   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
117   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
118   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
119   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
120   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
121 
122   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
123   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
124   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
125 
126   ierr = PetscFree(stmp);CHKERRQ(ierr);
127   ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
128 
129   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
130   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
131   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
137 
138   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
139   ierr = ISDestroy(&from);CHKERRQ(ierr);
140   ierr = ISDestroy(&to);CHKERRQ(ierr);
141   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
146 {
147   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
148   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
149   PetscErrorCode ierr;
150   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
151   PetscInt       bs = mat->rmap->bs,*stmp;
152   IS             from,to;
153   Vec            gvec;
154 #if defined(PETSC_USE_CTABLE)
155   PetscTable         gid1_lid1;
156   PetscTablePosition tpos;
157   PetscInt           gid,lid;
158 #else
159   PetscInt Nbs = baij->Nbs,*indices;
160 #endif
161 
162   PetscFunctionBegin;
163 #if defined(PETSC_USE_CTABLE)
164   /* use a table - Mark Adams */
165   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
166   for (i=0; i<B->mbs; i++) {
167     for (j=0; j<B->ilen[i]; j++) {
168       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
169       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
170       if (!data) {
171         /* one based table */
172         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
173       }
174     }
175   }
176   /* form array of columns we need */
177   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
178   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
179   while (tpos) {
180     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
181     gid--; lid--;
182     garray[lid] = gid;
183   }
184   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
185   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
186   for (i=0; i<ec; i++) {
187     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
188   }
189   /* compact out the extra columns in B */
190   for (i=0; i<B->mbs; i++) {
191     for (j=0; j<B->ilen[i]; j++) {
192       PetscInt gid1 = aj[B->i[i] + j] + 1;
193       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
194       lid--;
195       aj[B->i[i]+j] = lid;
196     }
197   }
198   B->nbs = ec;
199 
200   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
201 
202   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
203   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
204 #else
205   /* For the first stab we make an array as long as the number of columns */
206   /* mark those columns that are in baij->B */
207   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
208   for (i=0; i<B->mbs; i++) {
209     for (j=0; j<B->ilen[i]; j++) {
210       if (!indices[aj[B->i[i] + j]]) ec++;
211       indices[aj[B->i[i] + j]] = 1;
212     }
213   }
214 
215   /* form array of columns we need */
216   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
217   ec = 0;
218   for (i=0; i<Nbs; i++) {
219     if (indices[i]) {
220       garray[ec++] = i;
221     }
222   }
223 
224   /* make indices now point into garray */
225   for (i=0; i<ec; i++) indices[garray[i]] = i;
226 
227   /* compact out the extra columns in B */
228   for (i=0; i<B->mbs; i++) {
229     for (j=0; j<B->ilen[i]; j++) {
230       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
231     }
232   }
233   B->nbs = ec;
234 
235   baij->B->cmap->n = ec*mat->rmap->bs;
236 
237   ierr = PetscFree(indices);CHKERRQ(ierr);
238 #endif
239 
240   /* create local vector that is used to scatter into */
241   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
242 
243   /* create two temporary index sets for building scatter-gather */
244   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
245 
246   ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr);
247   for (i=0; i<ec; i++) stmp[i] = i;
248   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
249 
250   /* create temporary global vector to generate scatter context */
251   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
252   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
253   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
254 
255   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
256   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
257   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
258   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
259 
260   baij->garray = garray;
261 
262   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
263   ierr = ISDestroy(&from);CHKERRQ(ierr);
264   ierr = ISDestroy(&to);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 /*
269      Takes the local part of an already assembled MPISBAIJ matrix
270    and disassembles it. This is to allow new nonzeros into the matrix
271    that require more communication in the matrix vector multiply.
272    Thus certain data-structures must be rebuilt.
273 
274    Kind of slow! But that's what application programmers get when
275    they are sloppy.
276 */
277 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
278 {
279   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
280   Mat            B      = baij->B,Bnew;
281   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
282   PetscErrorCode ierr;
283   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
284   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
285   MatScalar      *a = Bbaij->a;
286   PetscScalar    *atmp;
287 #if defined(PETSC_USE_REAL_MAT_SINGLE)
288   PetscInt l;
289 #endif
290 
291   PetscFunctionBegin;
292 #if defined(PETSC_USE_REAL_MAT_SINGLE)
293   ierr = PetscMalloc1(A->rmap->bs,&atmp);
294 #endif
295   /* free stuff related to matrix-vec multiply */
296   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
297   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
298   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
299 
300   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
301   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
302   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
303   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
304   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
305 
306   if (baij->colmap) {
307 #if defined(PETSC_USE_CTABLE)
308     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
309 #else
310     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
311     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
312 #endif
313   }
314 
315   /* make sure that B is assembled so we can access its values */
316   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
317   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
318 
319   /* invent new B and copy stuff over */
320   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
321   for (i=0; i<mbs; i++) {
322     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
323   }
324   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
325   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
326   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
327   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
328   ierr = PetscFree(nz);CHKERRQ(ierr);
329 
330   ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
331   /*
332    Ensure that B's nonzerostate is monotonically increasing.
333    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
334    */
335   Bnew->nonzerostate = B->nonzerostate;
336   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
337   for (i=0; i<mbs; i++) {
338     rvals[0] = bs*i;
339     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
340     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
341       col = garray[Bbaij->j[j]]*bs;
342       for (k=0; k<bs; k++) {
343 #if defined(PETSC_USE_REAL_MAT_SINGLE)
344         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
345 #else
346         atmp = a+j*bs2 + k*bs;
347 #endif
348         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
349         col++;
350       }
351     }
352   }
353 #if defined(PETSC_USE_REAL_MAT_SINGLE)
354   ierr = PetscFree(atmp);CHKERRQ(ierr);
355 #endif
356   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
357 
358   baij->garray = 0;
359 
360   ierr = PetscFree(rvals);CHKERRQ(ierr);
361   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
362   ierr = MatDestroy(&B);CHKERRQ(ierr);
363   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
364 
365   baij->B = Bnew;
366 
367   A->was_assembled = PETSC_FALSE;
368   PetscFunctionReturn(0);
369 }
370