xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap.bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs;
68   ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap.n =baij->B->cmap.N  = ec*mat->rmap.bs;
105   ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   for (i=0; i<ec; i++) {
114     garray[i] = bs*garray[i];
115   }
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) {
118     garray[i] = garray[i]/bs;
119   }
120 
121   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
122   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
124   ierr = PetscFree(stmp);CHKERRQ(ierr);
125 
126   /* create temporary global vector to generate scatter context */
127   /* this is inefficient, but otherwise we must do either
128      1) save garray until the first actual scatter when the vector is known or
129      2) have another way of generating a scatter context without a vector.*/
130   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
131 
132   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
133 
134   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
138   baij->garray = garray;
139   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
140   ierr = ISDestroy(from);CHKERRQ(ierr);
141   ierr = ISDestroy(to);CHKERRQ(ierr);
142   ierr = VecDestroy(gvec);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 /*
147      Takes the local part of an already assembled MPIBAIJ matrix
148    and disassembles it. This is to allow new nonzeros into the matrix
149    that require more communication in the matrix vector multiply.
150    Thus certain data-structures must be rebuilt.
151 
152    Kind of slow! But that's what application programmers get when
153    they are sloppy.
154 */
155 #undef __FUNCT__
156 #define __FUNCT__ "DisAssemble_MPIBAIJ"
157 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
158 {
159   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
160   Mat            B = baij->B,Bnew;
161   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray;
164   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap.n;
165   MatScalar      *a = Bbaij->a;
166   PetscScalar    *atmp;
167 #if defined(PETSC_USE_MAT_SINGLE)
168   PetscInt       k;
169 #endif
170 
171   PetscFunctionBegin;
172   /* free stuff related to matrix-vec multiply */
173   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
174   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
175   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
176   if (baij->colmap) {
177 #if defined (PETSC_USE_CTABLE)
178     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
179 #else
180     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
181     baij->colmap = 0;
182     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
183 #endif
184   }
185 
186   /* make sure that B is assembled so we can access its values */
187   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
188   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
189 
190   /* invent new B and copy stuff over */
191   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
192   for (i=0; i<mbs; i++) {
193     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
194   }
195   ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr);
196   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
197   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
198   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr);
199   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
200 
201 #if defined(PETSC_USE_MAT_SINGLE)
202   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
203 #endif
204     for (i=0; i<mbs; i++) {
205       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
206         col  = garray[Bbaij->j[j]];
207 #if defined(PETSC_USE_MAT_SINGLE)
208         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
209 #else
210         atmp = a + j*bs2;
211 #endif
212         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
213       }
214     }
215     ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
216 
217 #if defined(PETSC_USE_MAT_SINGLE)
218   ierr = PetscFree(atmp);CHKERRQ(ierr);
219 #endif
220 
221   ierr = PetscFree(nz);CHKERRQ(ierr);
222   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
223   baij->garray = 0;
224   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
225   ierr = MatDestroy(B);CHKERRQ(ierr);
226   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
227   baij->B = Bnew;
228   A->was_assembled = PETSC_FALSE;
229   PetscFunctionReturn(0);
230 }
231 
232 /*      ugly stuff added for Glenn someday we should fix this up */
233 
234 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
235                                       parts of the local matrix */
236 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
241 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
242 {
243   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
244   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
245   PetscErrorCode ierr;
246   PetscInt       bs = inA->rmap.bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
247   PetscInt       *r_rmapd,*r_rmapo;
248 
249   PetscFunctionBegin;
250   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
251   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
252   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
253   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
254   nt   = 0;
255   for (i=0; i<inA->bmapping->n; i++) {
256     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
257       nt++;
258       r_rmapd[i] = inA->bmapping->indices[i] + 1;
259     }
260   }
261   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
262   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
263   for (i=0; i<inA->bmapping->n; i++) {
264     if (r_rmapd[i]){
265       for (j=0; j<bs; j++) {
266         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
267       }
268     }
269   }
270   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
271   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
274   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
275   for (i=0; i<B->nbs; i++) {
276     lindices[garray[i]] = i+1;
277   }
278   no   = inA->bmapping->n - nt;
279   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
280   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
281   nt   = 0;
282   for (i=0; i<inA->bmapping->n; i++) {
283     if (lindices[inA->bmapping->indices[i]]) {
284       nt++;
285       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
286     }
287   }
288   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
289   ierr = PetscFree(lindices);CHKERRQ(ierr);
290   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
291   for (i=0; i<inA->bmapping->n; i++) {
292     if (r_rmapo[i]){
293       for (j=0; j<bs; j++) {
294         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
295       }
296     }
297   }
298   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
299   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
300 
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
306 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
307 {
308   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
309   PetscErrorCode ierr,(*f)(Mat,Vec);
310 
311   PetscFunctionBegin;
312   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
313   if (f) {
314     ierr = (*f)(A,scale);CHKERRQ(ierr);
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 EXTERN_C_BEGIN
320 #undef __FUNCT__
321 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
322 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
323 {
324   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
325   PetscErrorCode ierr;
326   PetscInt       n,i;
327   PetscScalar    *d,*o,*s;
328 
329   PetscFunctionBegin;
330   if (!uglyrmapd) {
331     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
332   }
333 
334   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
335 
336   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
337   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
338   for (i=0; i<n; i++) {
339     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
340   }
341   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
342   /* column scale "diagonal" portion of local matrix */
343   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
344 
345   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
346   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
347   for (i=0; i<n; i++) {
348     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
349   }
350   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
351   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
352   /* column scale "off-diagonal" portion of local matrix */
353   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
354 
355   PetscFunctionReturn(0);
356 }
357 EXTERN_C_END
358 
359 
360