xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 94ef8dde638caef1d0cd84a7dc8a2db65fcda8b6)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
11   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
12   PetscErrorCode ierr;
13   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
14   PetscInt       bs = mat->rmap->bs,*stmp;
15   IS             from,to;
16   Vec            gvec;
17 #if defined(PETSC_USE_CTABLE)
18   PetscTable         gid1_lid1;
19   PetscTablePosition tpos;
20   PetscInt           gid,lid;
21 #else
22   PetscInt Nbs = baij->Nbs,*indices;
23 #endif
24 
25   PetscFunctionBegin;
26 #if defined(PETSC_USE_CTABLE)
27   /* use a table - Mark Adams */
28   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
29   for (i=0; i<B->mbs; i++) {
30     for (j=0; j<B->ilen[i]; j++) {
31       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
32       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
33       if (!data) {
34         /* one based table */
35         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
36       }
37     }
38   }
39   /* form array of columns we need */
40   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
41   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
42   while (tpos) {
43     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
44     gid--; lid--;
45     garray[lid] = gid;
46   }
47   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
48   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
49   for (i=0; i<ec; i++) {
50     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
51   }
52   /* compact out the extra columns in B */
53   for (i=0; i<B->mbs; i++) {
54     for (j=0; j<B->ilen[i]; j++) {
55       PetscInt gid1 = aj[B->i[i] + j] + 1;
56       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
57       lid--;
58       aj[B->i[i]+j] = lid;
59     }
60   }
61   B->nbs           = ec;
62   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
63 
64   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
65   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
66 #else
67   /* Make an array as long as the number of columns */
68   /* mark those columns that are in baij->B */
69   ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr);
70   for (i=0; i<B->mbs; i++) {
71     for (j=0; j<B->ilen[i]; j++) {
72       if (!indices[aj[B->i[i] + j]]) ec++;
73       indices[aj[B->i[i] + j]] = 1;
74     }
75   }
76 
77   /* form array of columns we need */
78   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
79   ec   = 0;
80   for (i=0; i<Nbs; i++) {
81     if (indices[i]) {
82       garray[ec++] = i;
83     }
84   }
85 
86   /* make indices now point into garray */
87   for (i=0; i<ec; i++) {
88     indices[garray[i]] = i;
89   }
90 
91   /* compact out the extra columns in B */
92   for (i=0; i<B->mbs; i++) {
93     for (j=0; j<B->ilen[i]; j++) {
94       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95     }
96   }
97   B->nbs           = ec;
98   baij->B->cmap->n = baij->B->cmap->N  = ec*mat->rmap->bs;
99 
100   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103 
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
109 
110   ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr);
111   for (i=0; i<ec; i++) stmp[i] = i;
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
113 
114   /* create temporary global vector to generate scatter context */
115   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
116 
117   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
118 
119   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
120   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
121   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
122   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
123 
124   baij->garray = garray;
125 
126   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
127   ierr = ISDestroy(&from);CHKERRQ(ierr);
128   ierr = ISDestroy(&to);CHKERRQ(ierr);
129   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /*
134      Takes the local part of an already assembled MPIBAIJ matrix
135    and disassembles it. This is to allow new nonzeros into the matrix
136    that require more communication in the matrix vector multiply.
137    Thus certain data-structures must be rebuilt.
138 
139    Kind of slow! But that's what application programmers get when
140    they are sloppy.
141 */
142 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
143 {
144   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
145   Mat            B      = baij->B,Bnew;
146   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
149   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
150   MatScalar      *a  = Bbaij->a;
151   MatScalar      *atmp;
152 
153 
154   PetscFunctionBegin;
155   /* free stuff related to matrix-vec multiply */
156   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
157   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
158   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
159   if (baij->colmap) {
160 #if defined(PETSC_USE_CTABLE)
161     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
162 #else
163     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
164     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
165 #endif
166   }
167 
168   /* make sure that B is assembled so we can access its values */
169   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171 
172   /* invent new B and copy stuff over */
173   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
174   for (i=0; i<mbs; i++) {
175     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
176   }
177   ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr);
178   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
179   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
180   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
181 
182   ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
183 
184   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
185   /*
186    Ensure that B's nonzerostate is monotonically increasing.
187    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
188    */
189   Bnew->nonzerostate = B->nonzerostate;
190 
191   for (i=0; i<mbs; i++) {
192     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
193       col  = garray[Bbaij->j[j]];
194       atmp = a + j*bs2;
195       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
196     }
197   }
198   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
199 
200   ierr = PetscFree(nz);CHKERRQ(ierr);
201   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
202   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
203   ierr = MatDestroy(&B);CHKERRQ(ierr);
204   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
205 
206   baij->B          = Bnew;
207   A->was_assembled = PETSC_FALSE;
208   A->assembled     = PETSC_FALSE;
209   PetscFunctionReturn(0);
210 }
211 
212 /*      ugly stuff added for Glenn someday we should fix this up */
213 
214 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
215 static Vec      uglydd     = 0,uglyoo     = 0;  /* work vectors used to scale the two parts of the local matrix */
216 
217 
218 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
219 {
220   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
221   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
222   PetscErrorCode ierr;
223   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
224   PetscInt       *r_rmapd,*r_rmapo;
225 
226   PetscFunctionBegin;
227   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
228   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
229   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr);
230   nt   = 0;
231   for (i=0; i<inA->rmap->mapping->n; i++) {
232     if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
233       nt++;
234       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
235     }
236   }
237   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
238   ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr);
239   for (i=0; i<inA->rmap->mapping->n; i++) {
240     if (r_rmapd[i]) {
241       for (j=0; j<bs; j++) {
242         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
243       }
244     }
245   }
246   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
247   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
248 
249   ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr);
250   for (i=0; i<B->nbs; i++) {
251     lindices[garray[i]] = i+1;
252   }
253   no   = inA->rmap->mapping->n - nt;
254   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr);
255   nt   = 0;
256   for (i=0; i<inA->rmap->mapping->n; i++) {
257     if (lindices[inA->rmap->mapping->indices[i]]) {
258       nt++;
259       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
260     }
261   }
262   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
263   ierr = PetscFree(lindices);CHKERRQ(ierr);
264   ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr);
265   for (i=0; i<inA->rmap->mapping->n; i++) {
266     if (r_rmapo[i]) {
267       for (j=0; j<bs; j++) {
268         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
269       }
270     }
271   }
272   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
273   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
278 {
279   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
280   PetscErrorCode ierr;
281 
282   PetscFunctionBegin;
283   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
288 {
289   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
290   PetscErrorCode ierr;
291   PetscInt       n,i;
292   PetscScalar    *d,*o,*s;
293 
294   PetscFunctionBegin;
295   if (!uglyrmapd) {
296     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
297   }
298 
299   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
300 
301   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
302   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
303   for (i=0; i<n; i++) {
304     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
305   }
306   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
307   /* column scale "diagonal" portion of local matrix */
308   ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr);
309 
310   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
311   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
312   for (i=0; i<n; i++) {
313     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
314   }
315   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
316   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
317   /* column scale "off-diagonal" portion of local matrix */
318   ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321