xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
12   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)(aij->B->data);
13   PetscErrorCode ierr;
14   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
15   IS             from,to;
16   Vec            gvec;
17   PetscBool      useblockis;
18 #if defined(PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   PetscInt           gid,lid;
22 #else
23   PetscInt N = mat->cmap->N,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 #if defined(PETSC_USE_CTABLE)
28   /* use a table */
29   ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
30   for (i=0; i<aij->B->rmap->n; i++) {
31     for (j=0; j<B->ilen[i]; j++) {
32       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
33       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
34       if (!data) {
35         /* one based table */
36         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
37       }
38     }
39   }
40   /* form array of columns we need */
41   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
42   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
43   while (tpos) {
44     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
45     gid--;
46     lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
50   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
51   for (i=0; i<ec; i++) {
52     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
53   }
54   /* compact out the extra columns in B */
55   for (i=0; i<aij->B->rmap->n; i++) {
56     for (j=0; j<B->ilen[i]; j++) {
57       PetscInt gid1 = aj[B->i[i] + j] + 1;
58       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
59       lid--;
60       aj[B->i[i] + j] = lid;
61     }
62   }
63   aij->B->cmap->n = aij->B->cmap->N = ec;
64 
65   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
66   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
67 #else
68   /* Make an array as long as the number of columns */
69   /* mark those columns that are in aij->B */
70   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
71   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
72   for (i=0; i<aij->B->rmap->n; i++) {
73     for (j=0; j<B->ilen[i]; j++) {
74       if (!indices[aj[B->i[i] + j]]) ec++;
75       indices[aj[B->i[i] + j]] = 1;
76     }
77   }
78 
79   /* form array of columns we need */
80   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
81   ec   = 0;
82   for (i=0; i<N; i++) {
83     if (indices[i]) garray[ec++] = i;
84   }
85 
86   /* make indices now point into garray */
87   for (i=0; i<ec; i++) {
88     indices[garray[i]] = i;
89   }
90 
91   /* compact out the extra columns in B */
92   for (i=0; i<aij->B->rmap->n; i++) {
93     for (j=0; j<B->ilen[i]; j++) {
94       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95     }
96   }
97   aij->B->cmap->n = aij->B->cmap->N = ec;
98 
99   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
100   ierr = PetscFree(indices);CHKERRQ(ierr);
101 #endif
102   /* create local vector that is used to scatter into */
103   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
104 
105   /* create two temporary Index sets for build scatter gather */
106   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
107   useblockis = PETSC_FALSE;
108   if (mat->cmap->bs > 1) {
109     PetscInt bs = mat->cmap->bs,ibs,ga;
110     if (!(ec % bs)) {
111       useblockis = PETSC_TRUE;
112       for (i=0; i<ec/bs; i++) {
113         if ((ga = garray[ibs = i*bs]) % bs) {
114           useblockis = PETSC_FALSE;
115           break;
116         }
117         for (j=1; j<bs; j++) {
118           if (garray[ibs+j] != ga+j) {
119             useblockis = PETSC_FALSE;
120             break;
121           }
122         }
123         if (!useblockis) break;
124       }
125     }
126   }
127 #if defined(PETSC_USE_DEBUG)
128   i    = (PetscInt)useblockis;
129   ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm);CHKERRQ(ierr);
130   if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)");
131 #endif
132 
133   if (useblockis) {
134     PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs;
135     if (ec%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs);
136     ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr);
137     ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr);
138     for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
139     ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
140   } else {
141     ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
142   }
143 
144   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
145 
146   /* create temporary global vector to generate scatter context */
147   /* This does not allocate the array's memory so is efficient */
148   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
149 
150   /* generate the scatter context */
151   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
152   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
156 
157   aij->garray = garray;
158 
159   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
160   ierr = ISDestroy(&from);CHKERRQ(ierr);
161   ierr = ISDestroy(&to);CHKERRQ(ierr);
162   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDisAssemble_MPIAIJ"
169 /*
170      Takes the local part of an already assembled MPIAIJ matrix
171    and disassembles it. This is to allow new nonzeros into the matrix
172    that require more communication in the matrix vector multiply.
173    Thus certain data-structures must be rebuilt.
174 
175    Kind of slow! But that's what application programmers get when
176    they are sloppy.
177 */
178 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
179 {
180   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)A->data;
181   Mat            B     = aij->B,Bnew;
182   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
183   PetscErrorCode ierr;
184   PetscInt       i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
185   PetscScalar    v;
186 
187   PetscFunctionBegin;
188   /* free stuff related to matrix-vec multiply */
189   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
190   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
191   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
192   if (aij->colmap) {
193 #if defined(PETSC_USE_CTABLE)
194     ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
195 #else
196     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
197     ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr);
198 #endif
199   }
200 
201   /* make sure that B is assembled so we can access its values */
202   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
203   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
204 
205   /* invent new B and copy stuff over */
206   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
207   for (i=0; i<m; i++) {
208     nz[i] = Baij->i[i+1] - Baij->i[i];
209   }
210   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
211   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
212   ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
213   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
214   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
215 
216   ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */
217 
218   ierr = PetscFree(nz);CHKERRQ(ierr);
219   for (i=0; i<m; i++) {
220     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
221       col  = garray[Baij->j[ct]];
222       v    = Baij->a[ct++];
223       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
224     }
225   }
226   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
227   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
228   ierr = MatDestroy(&B);CHKERRQ(ierr);
229   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
230 
231   aij->B           = Bnew;
232   A->was_assembled = PETSC_FALSE;
233   PetscFunctionReturn(0);
234 }
235 
236 /*      ugly stuff added for Glenn someday we should fix this up */
237 
238 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
239 static Vec auglydd          = 0,auglyoo     = 0; /* work vectors used to scale the two parts of the local matrix */
240 
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
244 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
245 {
246   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
247   PetscErrorCode ierr;
248   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
249   PetscInt       *r_rmapd,*r_rmapo;
250 
251   PetscFunctionBegin;
252   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
253   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
254   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
255   ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
256   nt   = 0;
257   for (i=0; i<inA->rmap->mapping->n; i++) {
258     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
259       nt++;
260       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
261     }
262   }
263   if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
264   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
265   for (i=0; i<inA->rmap->mapping->n; i++) {
266     if (r_rmapd[i]) {
267       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
268     }
269   }
270   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
271   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
274   ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
275   for (i=0; i<ina->B->cmap->n; i++) {
276     lindices[garray[i]] = i+1;
277   }
278   no   = inA->rmap->mapping->n - nt;
279   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
280   ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
281   nt   = 0;
282   for (i=0; i<inA->rmap->mapping->n; i++) {
283     if (lindices[inA->rmap->mapping->indices[i]]) {
284       nt++;
285       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
286     }
287   }
288   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
289   ierr = PetscFree(lindices);CHKERRQ(ierr);
290   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
291   for (i=0; i<inA->rmap->mapping->n; i++) {
292     if (r_rmapo[i]) {
293       auglyrmapo[(r_rmapo[i]-1)] = i;
294     }
295   }
296   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
297   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
303 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
304 {
305   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
306   PetscErrorCode ierr;
307 
308   PetscFunctionBegin;
309   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 
313 EXTERN_C_BEGIN
314 #undef __FUNCT__
315 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
316 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
317 {
318   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
319   PetscErrorCode ierr;
320   PetscInt       n,i;
321   PetscScalar    *d,*o,*s;
322 
323   PetscFunctionBegin;
324   if (!auglyrmapd) {
325     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
326   }
327 
328   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
329 
330   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
331   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
332   for (i=0; i<n; i++) {
333     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
334   }
335   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
336   /* column scale "diagonal" portion of local matrix */
337   ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr);
338 
339   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
340   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
341   for (i=0; i<n; i++) {
342     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
343   }
344   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
345   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
346   /* column scale "off-diagonal" portion of local matrix */
347   ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 EXTERN_C_END
351 
352 
353