xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 /*
2    Support for the parallel BAIJ matrix vector multiply
3 */
4 #include "src/mat/impls/baij/mpi/mpibaij.h"
5 
6 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
10 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
11 {
12   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
13   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
14   PetscErrorCode ierr;
15   int                i,j,*aj = B->j,ec = 0,*garray;
16   int                bs = baij->bs,*stmp;
17   IS                 from,to;
18   Vec                gvec;
19 #if defined (PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1;
21   PetscTablePosition tpos;
22   int                gid,lid;
23 #else
24   int                Nbs = baij->Nbs,*indices;
25 #endif
26 
27   PetscFunctionBegin;
28 
29 #if defined (PETSC_USE_CTABLE)
30   /* use a table - Mark Adams */
31   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
32   for (i=0; i<B->mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       int data,gid1 = aj[B->i[i]+j] + 1;
35       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
36       if (!data) {
37         /* one based table */
38         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
39       }
40     }
41   }
42   /* form array of columns we need */
43   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<B->mbs; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       int gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i]+j] = lid;
62     }
63   }
64   B->nbs     = ec;
65   baij->B->n = ec*B->bs;
66   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
67   /* Mark Adams */
68 #else
69   /* Make an array as long as the number of columns */
70   /* mark those columns that are in baij->B */
71   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73   for (i=0; i<B->mbs; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j]] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
82   ec = 0;
83   for (i=0; i<Nbs; i++) {
84     if (indices[i]) {
85       garray[ec++] = i;
86     }
87   }
88 
89   /* make indices now point into garray */
90   for (i=0; i<ec; i++) {
91     indices[garray[i]] = i;
92   }
93 
94   /* compact out the extra columns in B */
95   for (i=0; i<B->mbs; i++) {
96     for (j=0; j<B->ilen[i]; j++) {
97       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98     }
99   }
100   B->nbs       = ec;
101   baij->B->n   = ec*B->bs;
102   ierr = PetscFree(indices);CHKERRQ(ierr);
103 #endif
104 
105   /* create local vector that is used to scatter into */
106   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
107 
108   /* create two temporary index sets for building scatter-gather */
109   for (i=0; i<ec; i++) {
110     garray[i] = bs*garray[i];
111   }
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
113   for (i=0; i<ec; i++) {
114     garray[i] = garray[i]/bs;
115   }
116 
117   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
118   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
120   ierr = PetscFree(stmp);CHKERRQ(ierr);
121 
122   /* create temporary global vector to generate scatter context */
123   /* this is inefficient, but otherwise we must do either
124      1) save garray until the first actual scatter when the vector is known or
125      2) have another way of generating a scatter context without a vector.*/
126   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
127 
128   /* gnerate the scatter context */
129   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
130 
131   /*
132       Post the receives for the first matrix vector product. We sync-chronize after
133     this on the chance that the user immediately calls MatMult() after assemblying
134     the matrix.
135   */
136   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
137   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
138 
139   PetscLogObjectParent(mat,baij->Mvctx);
140   PetscLogObjectParent(mat,baij->lvec);
141   PetscLogObjectParent(mat,from);
142   PetscLogObjectParent(mat,to);
143   baij->garray = garray;
144   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
145   ierr = ISDestroy(from);CHKERRQ(ierr);
146   ierr = ISDestroy(to);CHKERRQ(ierr);
147   ierr = VecDestroy(gvec);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /*
152      Takes the local part of an already assembled MPIBAIJ matrix
153    and disassembles it. This is to allow new nonzeros into the matrix
154    that require more communication in the matrix vector multiply.
155    Thus certain data-structures must be rebuilt.
156 
157    Kind of slow! But that's what application programmers get when
158    they are sloppy.
159 */
160 #undef __FUNCT__
161 #define __FUNCT__ "DisAssemble_MPIBAIJ"
162 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
163 {
164   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
165   Mat          B = baij->B,Bnew;
166   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
167   PetscErrorCode ierr;
168   int i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
169   int          bs2 = baij->bs2,*nz,ec,m = A->m;
170   MatScalar    *a = Bbaij->a;
171   PetscScalar  *atmp;
172 #if defined(PETSC_USE_MAT_SINGLE)
173   int          k;
174 #endif
175 
176   PetscFunctionBegin;
177   /* free stuff related to matrix-vec multiply */
178   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
179   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
180   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
181   if (baij->colmap) {
182 #if defined (PETSC_USE_CTABLE)
183     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
184 #else
185     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
186     baij->colmap = 0;
187     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
188 #endif
189   }
190 
191   /* make sure that B is assembled so we can access its values */
192   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
194 
195   /* invent new B and copy stuff over */
196   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
197   for (i=0; i<mbs; i++) {
198     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
199   }
200   ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr);
201   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
202   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
203   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
204 
205 #if defined(PETSC_USE_MAT_SINGLE)
206   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
207 #endif
208     for (i=0; i<mbs; i++) {
209       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
210         col  = garray[Bbaij->j[j]];
211 #if defined(PETSC_USE_MAT_SINGLE)
212         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
213 #else
214         atmp = a + j*bs2;
215 #endif
216         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
217       }
218     }
219   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
220 
221 #if defined(PETSC_USE_MAT_SINGLE)
222   ierr = PetscFree(atmp);CHKERRQ(ierr);
223 #endif
224 
225   ierr = PetscFree(nz);CHKERRQ(ierr);
226   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
227   baij->garray = 0;
228   PetscLogObjectMemory(A,-ec*sizeof(int));
229   ierr = MatDestroy(B);CHKERRQ(ierr);
230   PetscLogObjectParent(A,Bnew);
231   baij->B = Bnew;
232   A->was_assembled = PETSC_FALSE;
233   PetscFunctionReturn(0);
234 }
235 
236 /*      ugly stuff added for Glenn someday we should fix this up */
237 
238 static int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
239                                       parts of the local matrix */
240 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
241 
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
245 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
246 {
247   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
248   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
249   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
250   PetscErrorCode ierr;
251   int bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
252   int          *r_rmapd,*r_rmapo;
253 
254   PetscFunctionBegin;
255   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
256   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
257   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
258   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
259   nt   = 0;
260   for (i=0; i<inA->bmapping->n; i++) {
261     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
262       nt++;
263       r_rmapd[i] = inA->bmapping->indices[i] + 1;
264     }
265   }
266   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
267   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
268   for (i=0; i<inA->bmapping->n; i++) {
269     if (r_rmapd[i]){
270       for (j=0; j<bs; j++) {
271         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
272       }
273     }
274   }
275   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
276   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
277 
278   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
279   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
280   for (i=0; i<B->nbs; i++) {
281     lindices[garray[i]] = i+1;
282   }
283   no   = inA->bmapping->n - nt;
284   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
285   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
286   nt   = 0;
287   for (i=0; i<inA->bmapping->n; i++) {
288     if (lindices[inA->bmapping->indices[i]]) {
289       nt++;
290       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
291     }
292   }
293   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
294   ierr = PetscFree(lindices);CHKERRQ(ierr);
295   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
296   for (i=0; i<inA->bmapping->n; i++) {
297     if (r_rmapo[i]){
298       for (j=0; j<bs; j++) {
299         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
300       }
301     }
302   }
303   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
304   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
305 
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
311 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
312 {
313   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
314   PetscErrorCode ierr,(*f)(Mat,Vec);
315 
316   PetscFunctionBegin;
317   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
318   if (f) {
319     ierr = (*f)(A,scale);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 EXTERN_C_BEGIN
325 #undef __FUNCT__
326 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
327 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
328 {
329   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
330   PetscErrorCode ierr;
331   int n,i;
332   PetscScalar  *d,*o,*s;
333 
334   PetscFunctionBegin;
335   if (!uglyrmapd) {
336     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
337   }
338 
339   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
340 
341   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
342   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
343   for (i=0; i<n; i++) {
344     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
345   }
346   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
347   /* column scale "diagonal" portion of local matrix */
348   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
349 
350   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
351   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
352   for (i=0; i<n; i++) {
353     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
354   }
355   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
356   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
357   /* column scale "off-diagonal" portion of local matrix */
358   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
359 
360   PetscFunctionReturn(0);
361 }
362 EXTERN_C_END
363 
364 
365