xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 7d6bfa3b9d7db0ccd4cc481237114ca8dbb0dbff)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
47   ec_owner = sgarray + 2*ec;
48 
49   ec = 0;
50   for (j=0; j<size; j++){
51     for (i=owners[j]; i<owners[j+1]; i++){
52       if (indices[i]) {
53         garray[ec]   = i;
54         ec_owner[ec] = j;
55         ec++;
56       }
57     }
58   }
59 
60   /* make indices now point into garray */
61   for (i=0; i<ec; i++) indices[garray[i]] = i;
62 
63   /* compact out the extra columns in B */
64   for (i=0; i<mbs; i++) {
65     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
66   }
67   B->nbs      = ec;
68   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
69   ierr = PetscMapSetUp((sbaij->B->cmap));CHKERRQ(ierr);
70   ierr = PetscFree(indices);CHKERRQ(ierr);
71 
72   /* create local vector that is used to scatter into */
73   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
74 
75   /* create two temporary index sets for building scatter-gather */
76   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
77   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
78   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
79 
80   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
81   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
82 
83   /* generate the scatter context
84      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
85   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
86   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
87   ierr = VecDestroy(gvec);CHKERRQ(ierr);
88 
89   sbaij->garray = garray;
90   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
91   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
92   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
93   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
94 
95   ierr = ISDestroy(from);CHKERRQ(ierr);
96   ierr = ISDestroy(to);CHKERRQ(ierr);
97 
98   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
99   lsize = (mbs + ec)*bs;
100   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
101   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
102   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
103 
104   sowners = sbaij->slvec0->map->range;
105 
106   /* x index in the IS sfrom */
107   for (i=0; i<ec; i++) {
108     j = ec_owner[i];
109     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
110   }
111   /* b index in the IS sfrom */
112   k = sowners[rank]/bs + mbs;
113   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
114 
115   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
117 
118   /* x index in the IS sto */
119   k = sowners[rank]/bs + mbs;
120   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
121   /* b index in the IS sto */
122   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
123 
124   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
125 
126   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
127 
128   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
129   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
131   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
132   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
133 
134   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
135   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
136   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
137 
138   ierr = PetscFree(stmp);CHKERRQ(ierr);
139   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
140 
141   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
145   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
146   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
147   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
148   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
149 
150   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
151   ierr = ISDestroy(from);CHKERRQ(ierr);
152   ierr = ISDestroy(to);CHKERRQ(ierr);
153   ierr = PetscFree(sgarray);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
159 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
160 {
161   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
162   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
163   PetscErrorCode     ierr;
164   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
165   PetscInt           bs = mat->rmap->bs,*stmp;
166   IS                 from,to;
167   Vec                gvec;
168 #if defined (PETSC_USE_CTABLE)
169   PetscTable         gid1_lid1;
170   PetscTablePosition tpos;
171   PetscInt           gid,lid;
172 #else
173   PetscInt           Nbs = baij->Nbs,*indices;
174 #endif
175 
176   PetscFunctionBegin;
177 #if defined (PETSC_USE_CTABLE)
178   /* use a table - Mark Adams */
179   PetscTableCreate(B->mbs,&gid1_lid1);
180   for (i=0; i<B->mbs; i++) {
181     for (j=0; j<B->ilen[i]; j++) {
182       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
183       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
184       if (!data) {
185         /* one based table */
186         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
187       }
188     }
189   }
190   /* form array of columns we need */
191   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
192   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
193   while (tpos) {
194     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
195     gid--; lid--;
196     garray[lid] = gid;
197   }
198   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
199   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
200   for (i=0; i<ec; i++) {
201     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
202   }
203   /* compact out the extra columns in B */
204   for (i=0; i<B->mbs; i++) {
205     for (j=0; j<B->ilen[i]; j++) {
206       PetscInt gid1 = aj[B->i[i] + j] + 1;
207       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
208       lid --;
209       aj[B->i[i]+j] = lid;
210     }
211   }
212   B->nbs     = ec;
213   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
214   ierr = PetscMapSetUp((baij->B->cmap));CHKERRQ(ierr);
215   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
216   /* Mark Adams */
217 #else
218   /* For the first stab we make an array as long as the number of columns */
219   /* mark those columns that are in baij->B */
220   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
221   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
222   for (i=0; i<B->mbs; i++) {
223     for (j=0; j<B->ilen[i]; j++) {
224       if (!indices[aj[B->i[i] + j]]) ec++;
225       indices[aj[B->i[i] + j] ] = 1;
226     }
227   }
228 
229   /* form array of columns we need */
230   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
231   ec = 0;
232   for (i=0; i<Nbs; i++) {
233     if (indices[i]) {
234       garray[ec++] = i;
235     }
236   }
237 
238   /* make indices now point into garray */
239   for (i=0; i<ec; i++) {
240     indices[garray[i]] = i;
241   }
242 
243   /* compact out the extra columns in B */
244   for (i=0; i<B->mbs; i++) {
245     for (j=0; j<B->ilen[i]; j++) {
246       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
247     }
248   }
249   B->nbs       = ec;
250   baij->B->cmap->n   = ec*mat->rmap->bs;
251   ierr = PetscFree(indices);CHKERRQ(ierr);
252 #endif
253 
254   /* create local vector that is used to scatter into */
255   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
256 
257   /* create two temporary index sets for building scatter-gather */
258   for (i=0; i<ec; i++) {
259     garray[i] = bs*garray[i];
260   }
261   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
262   for (i=0; i<ec; i++) {
263     garray[i] = garray[i]/bs;
264   }
265 
266   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
267   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
268   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
269   ierr = PetscFree(stmp);CHKERRQ(ierr);
270 
271   /* create temporary global vector to generate scatter context */
272   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
273   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
274   ierr = VecDestroy(gvec);CHKERRQ(ierr);
275 
276   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
277   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
278   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
279   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
280   baij->garray = garray;
281   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
282   ierr = ISDestroy(from);CHKERRQ(ierr);
283   ierr = ISDestroy(to);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 /*
288      Takes the local part of an already assembled MPISBAIJ matrix
289    and disassembles it. This is to allow new nonzeros into the matrix
290    that require more communication in the matrix vector multiply.
291    Thus certain data-structures must be rebuilt.
292 
293    Kind of slow! But that's what application programmers get when
294    they are sloppy.
295 */
296 #undef __FUNCT__
297 #define __FUNCT__ "DisAssemble_MPISBAIJ"
298 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
299 {
300   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
301   Mat            B = baij->B,Bnew;
302   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
303   PetscErrorCode ierr;
304   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
305   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
306   MatScalar      *a = Bbaij->a;
307   PetscScalar    *atmp;
308 #if defined(PETSC_USE_MAT_SINGLE)
309   PetscInt       l;
310 #endif
311 
312   PetscFunctionBegin;
313 #if defined(PETSC_USE_MAT_SINGLE)
314   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
315 #endif
316   /* free stuff related to matrix-vec multiply */
317   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
318   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
319   baij->lvec = 0;
320   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
321   baij->Mvctx = 0;
322 
323   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
324   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
325   baij->slvec0 = 0;
326   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
327   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
328   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
329   baij->slvec1 = 0;
330 
331   if (baij->colmap) {
332 #if defined (PETSC_USE_CTABLE)
333     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
334 #else
335     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
336     baij->colmap = 0;
337     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
338 #endif
339   }
340 
341   /* make sure that B is assembled so we can access its values */
342   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344 
345   /* invent new B and copy stuff over */
346   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
347   for (i=0; i<mbs; i++) {
348     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
349   }
350   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
351   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
352   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
353   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
354   ierr = PetscFree(nz);CHKERRQ(ierr);
355 
356   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
357   for (i=0; i<mbs; i++) {
358     rvals[0] = bs*i;
359     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
360     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
361       col = garray[Bbaij->j[j]]*bs;
362       for (k=0; k<bs; k++) {
363 #if defined(PETSC_USE_MAT_SINGLE)
364         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
365 #else
366         atmp = a+j*bs2 + k*bs;
367 #endif
368         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
369         col++;
370       }
371     }
372   }
373 #if defined(PETSC_USE_MAT_SINGLE)
374   ierr = PetscFree(atmp);CHKERRQ(ierr);
375 #endif
376   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
377   baij->garray = 0;
378   ierr = PetscFree(rvals);CHKERRQ(ierr);
379   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
380   ierr = MatDestroy(B);CHKERRQ(ierr);
381   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
382   baij->B = Bnew;
383   A->was_assembled = PETSC_FALSE;
384   PetscFunctionReturn(0);
385 }
386 
387 
388 
389