xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->n = ec*mat->bs;
68   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
69   /* Mark Adams */
70 #else
71   /* Make an array as long as the number of columns */
72   /* mark those columns that are in baij->B */
73   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
74   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
75   for (i=0; i<B->mbs; i++) {
76     for (j=0; j<B->ilen[i]; j++) {
77       if (!indices[aj[B->i[i] + j]]) ec++;
78       indices[aj[B->i[i] + j]] = 1;
79     }
80   }
81 
82   /* form array of columns we need */
83   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
84   ec = 0;
85   for (i=0; i<Nbs; i++) {
86     if (indices[i]) {
87       garray[ec++] = i;
88     }
89   }
90 
91   /* make indices now point into garray */
92   for (i=0; i<ec; i++) {
93     indices[garray[i]] = i;
94   }
95 
96   /* compact out the extra columns in B */
97   for (i=0; i<B->mbs; i++) {
98     for (j=0; j<B->ilen[i]; j++) {
99       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100     }
101   }
102   B->nbs       = ec;
103   baij->B->n   = ec*mat->bs;
104   ierr = PetscFree(indices);CHKERRQ(ierr);
105 #endif
106 
107   /* create local vector that is used to scatter into */
108   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
109 
110   /* create two temporary index sets for building scatter-gather */
111   for (i=0; i<ec; i++) {
112     garray[i] = bs*garray[i];
113   }
114   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
115   for (i=0; i<ec; i++) {
116     garray[i] = garray[i]/bs;
117   }
118 
119   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
120   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
121   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
122   ierr = PetscFree(stmp);CHKERRQ(ierr);
123 
124   /* create temporary global vector to generate scatter context */
125   /* this is inefficient, but otherwise we must do either
126      1) save garray until the first actual scatter when the vector is known or
127      2) have another way of generating a scatter context without a vector.*/
128   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
129 
130   /* gnerate the scatter context */
131   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
132 
133   /*
134       Post the receives for the first matrix vector product. We sync-chronize after
135     this on the chance that the user immediately calls MatMult() after assemblying
136     the matrix.
137   */
138   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
139   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
140 
141   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
145   baij->garray = garray;
146   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
147   ierr = ISDestroy(from);CHKERRQ(ierr);
148   ierr = ISDestroy(to);CHKERRQ(ierr);
149   ierr = VecDestroy(gvec);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 /*
154      Takes the local part of an already assembled MPIBAIJ matrix
155    and disassembles it. This is to allow new nonzeros into the matrix
156    that require more communication in the matrix vector multiply.
157    Thus certain data-structures must be rebuilt.
158 
159    Kind of slow! But that's what application programmers get when
160    they are sloppy.
161 */
162 #undef __FUNCT__
163 #define __FUNCT__ "DisAssemble_MPIBAIJ"
164 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
165 {
166   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
167   Mat            B = baij->B,Bnew;
168   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
169   PetscErrorCode ierr;
170   PetscInt       i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
171   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->m;
172   MatScalar      *a = Bbaij->a;
173   PetscScalar    *atmp;
174 #if defined(PETSC_USE_MAT_SINGLE)
175   PetscInt       k;
176 #endif
177 
178   PetscFunctionBegin;
179   /* free stuff related to matrix-vec multiply */
180   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
181   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
182   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
183   if (baij->colmap) {
184 #if defined (PETSC_USE_CTABLE)
185     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
186 #else
187     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
188     baij->colmap = 0;
189     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
190 #endif
191   }
192 
193   /* make sure that B is assembled so we can access its values */
194   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196 
197   /* invent new B and copy stuff over */
198   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
199   for (i=0; i<mbs; i++) {
200     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
201   }
202   ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr);
203   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
204   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
205   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
206   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
210 #endif
211     for (i=0; i<mbs; i++) {
212       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
213         col  = garray[Bbaij->j[j]];
214 #if defined(PETSC_USE_MAT_SINGLE)
215         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
216 #else
217         atmp = a + j*bs2;
218 #endif
219         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
220       }
221     }
222   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
223 
224 #if defined(PETSC_USE_MAT_SINGLE)
225   ierr = PetscFree(atmp);CHKERRQ(ierr);
226 #endif
227 
228   ierr = PetscFree(nz);CHKERRQ(ierr);
229   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
230   baij->garray = 0;
231   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
232   ierr = MatDestroy(B);CHKERRQ(ierr);
233   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
234   baij->B = Bnew;
235   A->was_assembled = PETSC_FALSE;
236   PetscFunctionReturn(0);
237 }
238 
239 /*      ugly stuff added for Glenn someday we should fix this up */
240 
241 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
242                                       parts of the local matrix */
243 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
244 
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
248 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
249 {
250   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
251   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
252   PetscErrorCode ierr;
253   PetscInt       bs = inA->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
254   PetscInt       *r_rmapd,*r_rmapo;
255 
256   PetscFunctionBegin;
257   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
258   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
259   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
260   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
261   nt   = 0;
262   for (i=0; i<inA->bmapping->n; i++) {
263     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
264       nt++;
265       r_rmapd[i] = inA->bmapping->indices[i] + 1;
266     }
267   }
268   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
269   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
270   for (i=0; i<inA->bmapping->n; i++) {
271     if (r_rmapd[i]){
272       for (j=0; j<bs; j++) {
273         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
274       }
275     }
276   }
277   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
278   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
279 
280   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
281   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
282   for (i=0; i<B->nbs; i++) {
283     lindices[garray[i]] = i+1;
284   }
285   no   = inA->bmapping->n - nt;
286   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
287   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
288   nt   = 0;
289   for (i=0; i<inA->bmapping->n; i++) {
290     if (lindices[inA->bmapping->indices[i]]) {
291       nt++;
292       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
293     }
294   }
295   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
296   ierr = PetscFree(lindices);CHKERRQ(ierr);
297   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
298   for (i=0; i<inA->bmapping->n; i++) {
299     if (r_rmapo[i]){
300       for (j=0; j<bs; j++) {
301         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
302       }
303     }
304   }
305   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
306   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
307 
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
313 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
314 {
315   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
316   PetscErrorCode ierr,(*f)(Mat,Vec);
317 
318   PetscFunctionBegin;
319   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
320   if (f) {
321     ierr = (*f)(A,scale);CHKERRQ(ierr);
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 EXTERN_C_BEGIN
327 #undef __FUNCT__
328 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
329 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
330 {
331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
332   PetscErrorCode ierr;
333   PetscInt       n,i;
334   PetscScalar    *d,*o,*s;
335 
336   PetscFunctionBegin;
337   if (!uglyrmapd) {
338     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
339   }
340 
341   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
342 
343   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
344   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
345   for (i=0; i<n; i++) {
346     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
347   }
348   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
349   /* column scale "diagonal" portion of local matrix */
350   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
351 
352   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
353   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
354   for (i=0; i<n; i++) {
355     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
356   }
357   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
358   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
359   /* column scale "off-diagonal" portion of local matrix */
360   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
361 
362   PetscFunctionReturn(0);
363 }
364 EXTERN_C_END
365 
366 
367