xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision a7fac7c2b47dbcd8ece0cc2dfdfe0e63be1bb7b5)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined(PETSC_USE_CTABLE)
22   PetscTable         gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt           gid,lid;
25 #else
26   PetscInt Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 #if defined(PETSC_USE_CTABLE)
31   /* use a table - Mark Adams */
32   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
33   for (i=0; i<B->mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
45   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
46   while (tpos) {
47     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
48     gid--; lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       PetscInt gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid--;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs           = ec;
66   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
67 
68   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
70 #else
71   /* Make an array as long as the number of columns */
72   /* mark those columns that are in baij->B */
73   ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j]] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
83   ec   = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs           = ec;
102   baij->B->cmap->n = baij->B->cmap->N  = ec*mat->rmap->bs;
103 
104   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
105   ierr = PetscFree(indices);CHKERRQ(ierr);
106 #endif
107 
108   /* create local vector that is used to scatter into */
109   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
110 
111   /* create two temporary index sets for building scatter-gather */
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
113 
114   ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr);
115   for (i=0; i<ec; i++) stmp[i] = i;
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
117 
118   /* create temporary global vector to generate scatter context */
119   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
120 
121   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
122 
123   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
127 
128   baij->garray = garray;
129 
130   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
131   ierr = ISDestroy(&from);CHKERRQ(ierr);
132   ierr = ISDestroy(&to);CHKERRQ(ierr);
133   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 /*
138      Takes the local part of an already assembled MPIBAIJ matrix
139    and disassembles it. This is to allow new nonzeros into the matrix
140    that require more communication in the matrix vector multiply.
141    Thus certain data-structures must be rebuilt.
142 
143    Kind of slow! But that's what application programmers get when
144    they are sloppy.
145 */
146 #undef __FUNCT__
147 #define __FUNCT__ "MatDisAssemble_MPIBAIJ"
148 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
149 {
150   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
151   Mat            B      = baij->B,Bnew;
152   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
153   PetscErrorCode ierr;
154   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
155   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
156   MatScalar      *a  = Bbaij->a;
157   MatScalar      *atmp;
158 
159 
160   PetscFunctionBegin;
161   /* free stuff related to matrix-vec multiply */
162   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
163   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
164   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
165   if (baij->colmap) {
166 #if defined(PETSC_USE_CTABLE)
167     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
168 #else
169     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
170     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
171 #endif
172   }
173 
174   /* make sure that B is assembled so we can access its values */
175   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
176   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177 
178   /* invent new B and copy stuff over */
179   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
180   for (i=0; i<mbs; i++) {
181     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
182   }
183   ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr);
184   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
185   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
186   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
187 
188   ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
189 
190   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
191   /*
192    Ensure that B's nonzerostate is monotonically increasing.
193    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
194    */
195   Bnew->nonzerostate = B->nonzerostate;
196 
197   for (i=0; i<mbs; i++) {
198     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
199       col  = garray[Bbaij->j[j]];
200       atmp = a + j*bs2;
201       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
202     }
203   }
204   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
205 
206   ierr = PetscFree(nz);CHKERRQ(ierr);
207   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
208   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
209   ierr = MatDestroy(&B);CHKERRQ(ierr);
210   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
211 
212   baij->B          = Bnew;
213   A->was_assembled = PETSC_FALSE;
214   A->assembled     = PETSC_FALSE;
215   PetscFunctionReturn(0);
216 }
217 
218 /*      ugly stuff added for Glenn someday we should fix this up */
219 
220 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
221 static Vec      uglydd     = 0,uglyoo     = 0;  /* work vectors used to scale the two parts of the local matrix */
222 
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
226 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
227 {
228   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
229   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
230   PetscErrorCode ierr;
231   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
232   PetscInt       *r_rmapd,*r_rmapo;
233 
234   PetscFunctionBegin;
235   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
236   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
237   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr);
238   nt   = 0;
239   for (i=0; i<inA->rmap->mapping->n; i++) {
240     if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
241       nt++;
242       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
243     }
244   }
245   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
246   ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr);
247   for (i=0; i<inA->rmap->mapping->n; i++) {
248     if (r_rmapd[i]) {
249       for (j=0; j<bs; j++) {
250         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
251       }
252     }
253   }
254   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
255   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
256 
257   ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr);
258   for (i=0; i<B->nbs; i++) {
259     lindices[garray[i]] = i+1;
260   }
261   no   = inA->rmap->mapping->n - nt;
262   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr);
263   nt   = 0;
264   for (i=0; i<inA->rmap->mapping->n; i++) {
265     if (lindices[inA->rmap->mapping->indices[i]]) {
266       nt++;
267       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
268     }
269   }
270   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
271   ierr = PetscFree(lindices);CHKERRQ(ierr);
272   ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr);
273   for (i=0; i<inA->rmap->mapping->n; i++) {
274     if (r_rmapo[i]) {
275       for (j=0; j<bs; j++) {
276         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
277       }
278     }
279   }
280   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
281   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
287 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
288 {
289   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
299 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
300 {
301   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
302   PetscErrorCode ierr;
303   PetscInt       n,i;
304   PetscScalar    *d,*o,*s;
305 
306   PetscFunctionBegin;
307   if (!uglyrmapd) {
308     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
309   }
310 
311   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
312 
313   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
314   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
315   for (i=0; i<n; i++) {
316     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
317   }
318   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
319   /* column scale "diagonal" portion of local matrix */
320   ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr);
321 
322   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
323   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
324   for (i=0; i<n; i++) {
325     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
326   }
327   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
328   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
329   /* column scale "off-diagonal" portion of local matrix */
330   ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333