xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 7d6bfa3b9d7db0ccd4cc481237114ca8dbb0dbff)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscMapSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
105   ierr = PetscMapSetUp((baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   for (i=0; i<ec; i++) {
114     garray[i] = bs*garray[i];
115   }
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) {
118     garray[i] = garray[i]/bs;
119   }
120 
121   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
122   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
124   ierr = PetscFree(stmp);CHKERRQ(ierr);
125 
126   /* create temporary global vector to generate scatter context */
127   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
128 
129   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
130 
131   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
135   baij->garray = garray;
136   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
137   ierr = ISDestroy(from);CHKERRQ(ierr);
138   ierr = ISDestroy(to);CHKERRQ(ierr);
139   ierr = VecDestroy(gvec);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 /*
144      Takes the local part of an already assembled MPIBAIJ matrix
145    and disassembles it. This is to allow new nonzeros into the matrix
146    that require more communication in the matrix vector multiply.
147    Thus certain data-structures must be rebuilt.
148 
149    Kind of slow! But that's what application programmers get when
150    they are sloppy.
151 */
152 #undef __FUNCT__
153 #define __FUNCT__ "DisAssemble_MPIBAIJ"
154 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
155 {
156   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
157   Mat            B = baij->B,Bnew;
158   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
159   PetscErrorCode ierr;
160   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
161   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
162   MatScalar      *a = Bbaij->a;
163   MatScalar      *atmp;
164 
165 
166   PetscFunctionBegin;
167   /* free stuff related to matrix-vec multiply */
168   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
169   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
170   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
171   if (baij->colmap) {
172 #if defined (PETSC_USE_CTABLE)
173     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
174 #else
175     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
176     baij->colmap = 0;
177     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
178 #endif
179   }
180 
181   /* make sure that B is assembled so we can access its values */
182   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
183   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
184 
185   /* invent new B and copy stuff over */
186   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
187   for (i=0; i<mbs; i++) {
188     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
189   }
190   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
191   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
192   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
193   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
194   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
195 
196   for (i=0; i<mbs; i++) {
197     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
198       col  = garray[Bbaij->j[j]];
199       atmp = a + j*bs2;
200       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
201     }
202   }
203   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
204 
205   ierr = PetscFree(nz);CHKERRQ(ierr);
206   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
207   baij->garray = 0;
208   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
209   ierr = MatDestroy(B);CHKERRQ(ierr);
210   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
211   baij->B = Bnew;
212   A->was_assembled = PETSC_FALSE;
213   PetscFunctionReturn(0);
214 }
215 
216 /*      ugly stuff added for Glenn someday we should fix this up */
217 
218 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
219                                       parts of the local matrix */
220 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
221 
222 
223 #undef __FUNCT__
224 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
225 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
226 {
227   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
228   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
229   PetscErrorCode ierr;
230   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
231   PetscInt       *r_rmapd,*r_rmapo;
232 
233   PetscFunctionBegin;
234   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
235   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
236   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
237   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
238   nt   = 0;
239   for (i=0; i<inA->bmapping->n; i++) {
240     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
241       nt++;
242       r_rmapd[i] = inA->bmapping->indices[i] + 1;
243     }
244   }
245   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
246   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
247   for (i=0; i<inA->bmapping->n; i++) {
248     if (r_rmapd[i]){
249       for (j=0; j<bs; j++) {
250         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
251       }
252     }
253   }
254   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
255   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
256 
257   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
258   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
259   for (i=0; i<B->nbs; i++) {
260     lindices[garray[i]] = i+1;
261   }
262   no   = inA->bmapping->n - nt;
263   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
264   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
265   nt   = 0;
266   for (i=0; i<inA->bmapping->n; i++) {
267     if (lindices[inA->bmapping->indices[i]]) {
268       nt++;
269       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
270     }
271   }
272   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
273   ierr = PetscFree(lindices);CHKERRQ(ierr);
274   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
275   for (i=0; i<inA->bmapping->n; i++) {
276     if (r_rmapo[i]){
277       for (j=0; j<bs; j++) {
278         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
279       }
280     }
281   }
282   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
283   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
284 
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
291 {
292   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
293   PetscErrorCode ierr,(*f)(Mat,Vec);
294 
295   PetscFunctionBegin;
296   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
297   if (f) {
298     ierr = (*f)(A,scale);CHKERRQ(ierr);
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 EXTERN_C_BEGIN
304 #undef __FUNCT__
305 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
306 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
307 {
308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
309   PetscErrorCode ierr;
310   PetscInt       n,i;
311   PetscScalar    *d,*o,*s;
312 
313   PetscFunctionBegin;
314   if (!uglyrmapd) {
315     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
316   }
317 
318   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
319 
320   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
321   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
322   for (i=0; i<n; i++) {
323     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
324   }
325   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
326   /* column scale "diagonal" portion of local matrix */
327   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
328 
329   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
330   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
331   for (i=0; i<n; i++) {
332     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
333   }
334   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
335   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
336   /* column scale "off-diagonal" portion of local matrix */
337   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
338 
339   PetscFunctionReturn(0);
340 }
341 EXTERN_C_END
342 
343 
344