xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 
7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
14   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
15   PetscErrorCode ierr;
16   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
17   PetscInt       bs = mat->rmap->bs,*stmp;
18   IS             from,to;
19   Vec            gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable     gid1_lid1;
22   PetscTablePosition tpos;
23   PetscInt       gid,lid;
24 #else
25   PetscInt       Nbs = baij->Nbs,*indices;
26 #endif
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   /* use a table - Mark Adams */
31   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
32   for (i=0; i<B->mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
35       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
36       if (!data) {
37         /* one based table */
38         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
39       }
40     }
41   }
42   /* form array of columns we need */
43   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<B->mbs; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i]+j] = lid;
62     }
63   }
64   B->nbs     = ec;
65   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
66   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
67   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
68 #else
69   /* Make an array as long as the number of columns */
70   /* mark those columns that are in baij->B */
71   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
73   for (i=0; i<B->mbs; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j]] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
82   ec = 0;
83   for (i=0; i<Nbs; i++) {
84     if (indices[i]) {
85       garray[ec++] = i;
86     }
87   }
88 
89   /* make indices now point into garray */
90   for (i=0; i<ec; i++) {
91     indices[garray[i]] = i;
92   }
93 
94   /* compact out the extra columns in B */
95   for (i=0; i<B->mbs; i++) {
96     for (j=0; j<B->ilen[i]; j++) {
97       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98     }
99   }
100   B->nbs       = ec;
101   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
102   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
103   ierr = PetscFree(indices);CHKERRQ(ierr);
104 #endif
105 
106   /* create local vector that is used to scatter into */
107   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
108 
109   /* create two temporary index sets for building scatter-gather */
110   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
111 
112   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
113   for (i=0; i<ec; i++) { stmp[i] = i; }
114   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
115 
116   /* create temporary global vector to generate scatter context */
117   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
118 
119   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
120 
121   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
122   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
123   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
125   baij->garray = garray;
126   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
127   ierr = ISDestroy(&from);CHKERRQ(ierr);
128   ierr = ISDestroy(&to);CHKERRQ(ierr);
129   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /*
134      Takes the local part of an already assembled MPIBAIJ matrix
135    and disassembles it. This is to allow new nonzeros into the matrix
136    that require more communication in the matrix vector multiply.
137    Thus certain data-structures must be rebuilt.
138 
139    Kind of slow! But that's what application programmers get when
140    they are sloppy.
141 */
142 #undef __FUNCT__
143 #define __FUNCT__ "MatDisAssemble_MPIBAIJ"
144 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
145 {
146   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
147   Mat            B = baij->B,Bnew;
148   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
149   PetscErrorCode ierr;
150   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
151   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
152   MatScalar      *a = Bbaij->a;
153   MatScalar      *atmp;
154 
155 
156   PetscFunctionBegin;
157   /* free stuff related to matrix-vec multiply */
158   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
159   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
160   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
161   if (baij->colmap) {
162 #if defined (PETSC_USE_CTABLE)
163     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
164 #else
165     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
166     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
167 #endif
168   }
169 
170   /* make sure that B is assembled so we can access its values */
171   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
173 
174   /* invent new B and copy stuff over */
175   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
176   for (i=0; i<mbs; i++) {
177     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
178   }
179   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
180   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
181   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
182   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
183   ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
184   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
185 
186   for (i=0; i<mbs; i++) {
187     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
188       col  = garray[Bbaij->j[j]];
189       atmp = a + j*bs2;
190       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
191     }
192   }
193   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
194 
195   ierr = PetscFree(nz);CHKERRQ(ierr);
196   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
197   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
198   ierr = MatDestroy(&B);CHKERRQ(ierr);
199   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
200   baij->B = Bnew;
201   A->was_assembled = PETSC_FALSE;
202   A->assembled     = PETSC_FALSE;
203   PetscFunctionReturn(0);
204 }
205 
206 /*      ugly stuff added for Glenn someday we should fix this up */
207 
208 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
209                                       parts of the local matrix */
210 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
211 
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
215 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
216 {
217   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
218   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
219   PetscErrorCode ierr;
220   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
221   PetscInt       *r_rmapd,*r_rmapo;
222 
223   PetscFunctionBegin;
224   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
225   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
226   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
227   ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
228   nt   = 0;
229   for (i=0; i<inA->rmap->bmapping->n; i++) {
230     if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) {
231       nt++;
232       r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1;
233     }
234   }
235   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
236   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
237   for (i=0; i<inA->rmap->bmapping->n; i++) {
238     if (r_rmapd[i]) {
239       for (j=0; j<bs; j++) {
240         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
241       }
242     }
243   }
244   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
245   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
246 
247   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
248   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
249   for (i=0; i<B->nbs; i++) {
250     lindices[garray[i]] = i+1;
251   }
252   no   = inA->rmap->bmapping->n - nt;
253   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
254   ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
255   nt   = 0;
256   for (i=0; i<inA->rmap->bmapping->n; i++) {
257     if (lindices[inA->rmap->bmapping->indices[i]]) {
258       nt++;
259       r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]];
260     }
261   }
262   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
263   ierr = PetscFree(lindices);CHKERRQ(ierr);
264   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
265   for (i=0; i<inA->rmap->bmapping->n; i++) {
266     if (r_rmapo[i]) {
267       for (j=0; j<bs; j++) {
268         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
269       }
270     }
271   }
272   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
273   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
279 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
280 {
281   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
282   PetscErrorCode ierr;
283 
284   PetscFunctionBegin;
285   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 EXTERN_C_BEGIN
290 #undef __FUNCT__
291 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
292 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
293 {
294   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
295   PetscErrorCode ierr;
296   PetscInt       n,i;
297   PetscScalar    *d,*o,*s;
298 
299   PetscFunctionBegin;
300   if (!uglyrmapd) {
301     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
302   }
303 
304   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
305 
306   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
307   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
308   for (i=0; i<n; i++) {
309     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
310   }
311   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
312   /* column scale "diagonal" portion of local matrix */
313   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
314 
315   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
316   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
317   for (i=0; i<n; i++) {
318     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
319   }
320   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
321   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
322   /* column scale "off-diagonal" portion of local matrix */
323   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 EXTERN_C_END
327 
328 
329