xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS             from,to;
20   Vec            gvec;
21   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
22   PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
23   const PetscInt *sowners;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
28 
29   /* For the first stab we make an array as long as the number of columns */
30   /* mark those columns that are in sbaij->B */
31   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
32   for (i=0; i<mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       if (!indices[aj[B->i[i] + j]]) ec++;
35       indices[aj[B->i[i] + j]] = 1;
36     }
37   }
38 
39   /* form arrays of columns we need */
40   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
41   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
42 
43   ec = 0;
44   for (j=0; j<size; j++) {
45     for (i=owners[j]; i<owners[j+1]; i++) {
46       if (indices[i]) {
47         garray[ec]   = i;
48         ec_owner[ec] = j;
49         ec++;
50       }
51     }
52   }
53 
54   /* make indices now point into garray */
55   for (i=0; i<ec; i++) indices[garray[i]] = i;
56 
57   /* compact out the extra columns in B */
58   for (i=0; i<mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
60   }
61   B->nbs = ec;
62 
63   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
64 
65   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
66   ierr = PetscFree(indices);CHKERRQ(ierr);
67 
68   /* create local vector that is used to scatter into */
69   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
70 
71   /* create two temporary index sets for building scatter-gather */
72   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
73   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
74   for (i=0; i<ec; i++) stmp[i] = i;
75   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
76 
77   /* generate the scatter context
78      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
79   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
80   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
81   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
82 
83   sbaij->garray = garray;
84 
85   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
86   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
87   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
88   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
89 
90   ierr = ISDestroy(&from);CHKERRQ(ierr);
91   ierr = ISDestroy(&to);CHKERRQ(ierr);
92 
93   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
94   lsize = (mbs + ec)*bs;
95   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
96   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
97   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
98 
99   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
100 
101   /* x index in the IS sfrom */
102   for (i=0; i<ec; i++) {
103     j          = ec_owner[i];
104     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
105   }
106   /* b index in the IS sfrom */
107   k = sowners[rank]/bs + mbs;
108   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
109   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
110 
111   /* x index in the IS sto */
112   k = sowners[rank]/bs + mbs;
113   for (i=0; i<ec; i++) stmp[i] = (k + i);
114   /* b index in the IS sto */
115   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
116 
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
118 
119   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
120 
121   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
122   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
123   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
124   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
125   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
126 
127   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
129   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
130 
131   ierr = PetscFree(stmp);CHKERRQ(ierr);
132   ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
133 
134   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
140   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
142 
143   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
144   ierr = ISDestroy(&from);CHKERRQ(ierr);
145   ierr = ISDestroy(&to);CHKERRQ(ierr);
146   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
152 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
153 {
154   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
155   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
156   PetscErrorCode ierr;
157   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
158   PetscInt       bs = mat->rmap->bs,*stmp;
159   IS             from,to;
160   Vec            gvec;
161 #if defined(PETSC_USE_CTABLE)
162   PetscTable         gid1_lid1;
163   PetscTablePosition tpos;
164   PetscInt           gid,lid;
165 #else
166   PetscInt Nbs = baij->Nbs,*indices;
167 #endif
168 
169   PetscFunctionBegin;
170 #if defined(PETSC_USE_CTABLE)
171   /* use a table - Mark Adams */
172   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
173   for (i=0; i<B->mbs; i++) {
174     for (j=0; j<B->ilen[i]; j++) {
175       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
176       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
177       if (!data) {
178         /* one based table */
179         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
180       }
181     }
182   }
183   /* form array of columns we need */
184   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
185   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
186   while (tpos) {
187     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
188     gid--; lid--;
189     garray[lid] = gid;
190   }
191   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
192   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
193   for (i=0; i<ec; i++) {
194     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
195   }
196   /* compact out the extra columns in B */
197   for (i=0; i<B->mbs; i++) {
198     for (j=0; j<B->ilen[i]; j++) {
199       PetscInt gid1 = aj[B->i[i] + j] + 1;
200       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
201       lid--;
202       aj[B->i[i]+j] = lid;
203     }
204   }
205   B->nbs = ec;
206 
207   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
208 
209   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
210   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
211 #else
212   /* For the first stab we make an array as long as the number of columns */
213   /* mark those columns that are in baij->B */
214   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
215   for (i=0; i<B->mbs; i++) {
216     for (j=0; j<B->ilen[i]; j++) {
217       if (!indices[aj[B->i[i] + j]]) ec++;
218       indices[aj[B->i[i] + j]] = 1;
219     }
220   }
221 
222   /* form array of columns we need */
223   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
224   ec = 0;
225   for (i=0; i<Nbs; i++) {
226     if (indices[i]) {
227       garray[ec++] = i;
228     }
229   }
230 
231   /* make indices now point into garray */
232   for (i=0; i<ec; i++) indices[garray[i]] = i;
233 
234   /* compact out the extra columns in B */
235   for (i=0; i<B->mbs; i++) {
236     for (j=0; j<B->ilen[i]; j++) {
237       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
238     }
239   }
240   B->nbs = ec;
241 
242   baij->B->cmap->n = ec*mat->rmap->bs;
243 
244   ierr = PetscFree(indices);CHKERRQ(ierr);
245 #endif
246 
247   /* create local vector that is used to scatter into */
248   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
249 
250   /* create two temporary index sets for building scatter-gather */
251   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
252 
253   ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr);
254   for (i=0; i<ec; i++) stmp[i] = i;
255   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
256 
257   /* create temporary global vector to generate scatter context */
258   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
259   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
260   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
261 
262   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
263   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
264   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
265   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
266 
267   baij->garray = garray;
268 
269   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
270   ierr = ISDestroy(&from);CHKERRQ(ierr);
271   ierr = ISDestroy(&to);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 /*
276      Takes the local part of an already assembled MPISBAIJ matrix
277    and disassembles it. This is to allow new nonzeros into the matrix
278    that require more communication in the matrix vector multiply.
279    Thus certain data-structures must be rebuilt.
280 
281    Kind of slow! But that's what application programmers get when
282    they are sloppy.
283 */
284 #undef __FUNCT__
285 #define __FUNCT__ "MatDisAssemble_MPISBAIJ"
286 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
287 {
288   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
289   Mat            B      = baij->B,Bnew;
290   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
291   PetscErrorCode ierr;
292   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
293   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
294   MatScalar      *a = Bbaij->a;
295   PetscScalar    *atmp;
296 #if defined(PETSC_USE_REAL_MAT_SINGLE)
297   PetscInt l;
298 #endif
299 
300   PetscFunctionBegin;
301 #if defined(PETSC_USE_REAL_MAT_SINGLE)
302   ierr = PetscMalloc1(A->rmap->bs,&atmp);
303 #endif
304   /* free stuff related to matrix-vec multiply */
305   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
306   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
307   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
308 
309   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
310   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
311   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
312   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
313   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
314 
315   if (baij->colmap) {
316 #if defined(PETSC_USE_CTABLE)
317     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
318 #else
319     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
320     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
321 #endif
322   }
323 
324   /* make sure that B is assembled so we can access its values */
325   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
326   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
327 
328   /* invent new B and copy stuff over */
329   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
330   for (i=0; i<mbs; i++) {
331     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
332   }
333   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
334   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
335   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
336   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
337   ierr = PetscFree(nz);CHKERRQ(ierr);
338 
339   ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
340   /*
341    Ensure that B's nonzerostate is monotonically increasing.
342    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
343    */
344   Bnew->nonzerostate = B->nonzerostate;
345   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
346   for (i=0; i<mbs; i++) {
347     rvals[0] = bs*i;
348     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
349     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
350       col = garray[Bbaij->j[j]]*bs;
351       for (k=0; k<bs; k++) {
352 #if defined(PETSC_USE_REAL_MAT_SINGLE)
353         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
354 #else
355         atmp = a+j*bs2 + k*bs;
356 #endif
357         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
358         col++;
359       }
360     }
361   }
362 #if defined(PETSC_USE_REAL_MAT_SINGLE)
363   ierr = PetscFree(atmp);CHKERRQ(ierr);
364 #endif
365   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
366 
367   baij->garray = 0;
368 
369   ierr = PetscFree(rvals);CHKERRQ(ierr);
370   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
371   ierr = MatDestroy(&B);CHKERRQ(ierr);
372   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
373 
374   baij->B = Bnew;
375 
376   A->was_assembled = PETSC_FALSE;
377   PetscFunctionReturn(0);
378 }
379