xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: mmbaij.c,v 1.46 2001/09/25 00:31:36 balay Exp $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
13 int MatSetUpMultiply_MPIBAIJ(Mat mat)
14 {
15   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
16   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
17   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
18   int                bs = baij->bs,*stmp;
19   IS                 from,to;
20   Vec                gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable         gid1_lid1;
23   PetscTablePosition tpos;
24   int                gid,lid;
25 #endif
26 
27   PetscFunctionBegin;
28 
29 #if defined (PETSC_USE_CTABLE)
30   /* use a table - Mark Adams */
31   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
32   for (i=0; i<B->mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       int data,gid1 = aj[B->i[i]+j] + 1;
35       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
36       if (!data) {
37         /* one based table */
38         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
39       }
40     }
41   }
42   /* form array of columns we need */
43   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       int gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->n = ec*B->bs;
67   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68   /* Mark Adams */
69 #else
70   /* Make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j]] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
83   ec = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs       = ec;
102   baij->B->n   = ec*B->bs;
103   ierr = PetscFree(indices);CHKERRQ(ierr);
104 #endif
105 
106   /* create local vector that is used to scatter into */
107   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
108 
109   /* create two temporary index sets for building scatter-gather */
110   for (i=0; i<ec; i++) {
111     garray[i] = bs*garray[i];
112   }
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
114   for (i=0; i<ec; i++) {
115     garray[i] = garray[i]/bs;
116   }
117 
118   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
119   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
120   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
121   ierr = PetscFree(stmp);CHKERRQ(ierr);
122 
123   /* create temporary global vector to generate scatter context */
124   /* this is inefficient, but otherwise we must do either
125      1) save garray until the first actual scatter when the vector is known or
126      2) have another way of generating a scatter context without a vector.*/
127   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
128 
129   /* gnerate the scatter context */
130   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
131 
132   /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
138   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
139 
140   PetscLogObjectParent(mat,baij->Mvctx);
141   PetscLogObjectParent(mat,baij->lvec);
142   PetscLogObjectParent(mat,from);
143   PetscLogObjectParent(mat,to);
144   baij->garray = garray;
145   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
146   ierr = ISDestroy(from);CHKERRQ(ierr);
147   ierr = ISDestroy(to);CHKERRQ(ierr);
148   ierr = VecDestroy(gvec);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*
153      Takes the local part of an already assembled MPIBAIJ matrix
154    and disassembles it. This is to allow new nonzeros into the matrix
155    that require more communication in the matrix vector multiply.
156    Thus certain data-structures must be rebuilt.
157 
158    Kind of slow! But that's what application programmers get when
159    they are sloppy.
160 */
161 #undef __FUNCT__
162 #define __FUNCT__ "DisAssemble_MPIBAIJ"
163 int DisAssemble_MPIBAIJ(Mat A)
164 {
165   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
166   Mat          B = baij->B,Bnew;
167   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
168   int          ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
169   int          bs2 = baij->bs2,*nz,ec,m = A->m;
170   MatScalar    *a = Bbaij->a;
171   PetscScalar  *atmp;
172 #if defined(PETSC_USE_MAT_SINGLE)
173   int          k;
174 #endif
175 
176   PetscFunctionBegin;
177   /* free stuff related to matrix-vec multiply */
178   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
179   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
180   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
181   if (baij->colmap) {
182 #if defined (PETSC_USE_CTABLE)
183     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
184 #else
185     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
186     baij->colmap = 0;
187     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
188 #endif
189   }
190 
191   /* make sure that B is assembled so we can access its values */
192   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
194 
195   /* invent new B and copy stuff over */
196   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
197   for (i=0; i<mbs; i++) {
198     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
199   }
200   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
201   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
202 
203 #if defined(PETSC_USE_MAT_SINGLE)
204   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
205 #endif
206     for (i=0; i<mbs; i++) {
207       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
208         col  = garray[Bbaij->j[j]];
209 #if defined(PETSC_USE_MAT_SINGLE)
210         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
211 #else
212         atmp = a + j*bs2;
213 #endif
214         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
215       }
216     }
217   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
218 
219 #if defined(PETSC_USE_MAT_SINGLE)
220   ierr = PetscFree(atmp);CHKERRQ(ierr);
221 #endif
222 
223   ierr = PetscFree(nz);CHKERRQ(ierr);
224   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
225   baij->garray = 0;
226   PetscLogObjectMemory(A,-ec*sizeof(int));
227   ierr = MatDestroy(B);CHKERRQ(ierr);
228   PetscLogObjectParent(A,Bnew);
229   baij->B = Bnew;
230   A->was_assembled = PETSC_FALSE;
231   PetscFunctionReturn(0);
232 }
233 
234 /*      ugly stuff added for Glenn someday we should fix this up */
235 
236 int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
237                                       parts of the local matrix */
238 Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
239 
240 
241 #undef __FUNC__
242 #define __FUNC__ "MatMPIBAIJDiagonalScaleLocalSetUp"
243 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
244 {
245   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
246   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
247   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
248   int          ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
249   int          *r_rmapd,*r_rmapo;
250 
251   PetscFunctionBegin;
252   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
253   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
254   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
255   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
256   nt   = 0;
257   for (i=0; i<inA->bmapping->n; i++) {
258     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
259       nt++;
260       r_rmapd[i] = inA->bmapping->indices[i] + 1;
261     }
262   }
263   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
264   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
265   for (i=0; i<inA->bmapping->n; i++) {
266     if (r_rmapd[i]){
267       for (j=0; j<bs; j++) {
268         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
269       }
270     }
271   }
272   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
273   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
274 
275   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
276   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
277   for (i=0; i<B->nbs; i++) {
278     lindices[garray[i]] = i+1;
279   }
280   no   = inA->bmapping->n - nt;
281   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
282   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
283   nt   = 0;
284   for (i=0; i<inA->bmapping->n; i++) {
285     if (lindices[inA->bmapping->indices[i]]) {
286       nt++;
287       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
288     }
289   }
290   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
291   ierr = PetscFree(lindices);CHKERRQ(ierr);
292   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
293   for (i=0; i<inA->bmapping->n; i++) {
294     if (r_rmapo[i]){
295       for (j=0; j<bs; j++) {
296         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
297       }
298     }
299   }
300   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
301   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
302 
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNC__
307 #define __FUNC__ "MatMPIBAIJDiagonalScaleLocal"
308 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
309 {
310   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
311   int          ierr,n,i;
312   PetscScalar  *d,*o,*s;
313 
314   PetscFunctionBegin;
315   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
316 
317   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
318   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
319   for (i=0; i<n; i++) {
320     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
321   }
322   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
323   /* column scale "diagonal" portion of local matrix */
324   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
325 
326   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
327   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
328   for (i=0; i<n; i++) {
329     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
330   }
331   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
332   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
333   /* column scale "off-diagonal" portion of local matrix */
334   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
335 
336   PetscFunctionReturn(0);
337 }
338 
339 
340 
341