xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscErrorCode ierr;
12   PetscInt       Nbs = sbaij->Nbs,i,j,*aj = B->j,ec = 0,*garray,*sgarray;
13   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
14   IS             from,to;
15   Vec            gvec;
16   PetscMPIInt    rank = sbaij->rank,lsize;
17   PetscInt       *owners = sbaij->rangebs,*ec_owner,k;
18   const PetscInt *sowners;
19   PetscScalar    *ptr;
20 #if defined(PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1; /* one-based gid to lid table */
22   PetscTablePosition tpos;
23   PetscInt           gid,lid;
24 #else
25   PetscInt           *indices;
26 #endif
27 
28   PetscFunctionBegin;
29 #if defined(PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(mbs,Nbs+1,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);}
36     }
37   }
38   /* form array of columns we need */
39   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
40   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
41   while (tpos) {
42     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
43     gid--; lid--;
44     garray[lid] = gid;
45   }
46   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
47   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
48   for (i=0; i<ec; i++) {ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);}
49   /* compact out the extra columns in B */
50   for (i=0; i<B->mbs; i++) {
51     for (j=0; j<B->ilen[i]; j++) {
52       PetscInt gid1 = aj[B->i[i] + j] + 1;
53       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
54       lid--;
55       aj[B->i[i]+j] = lid;
56     }
57   }
58   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
59   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
60   for (i=j=0; i<ec; i++) {
61     while (garray[i]>=owners[j+1]) j++;
62     ec_owner[i] = j;
63   }
64 #else
65   /* For the first stab we make an array as long as the number of columns */
66   /* mark those columns that are in sbaij->B */
67   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
68   for (i=0; i<mbs; i++) {
69     for (j=0; j<B->ilen[i]; j++) {
70       if (!indices[aj[B->i[i] + j]]) ec++;
71       indices[aj[B->i[i] + j]] = 1;
72     }
73   }
74 
75   /* form arrays of columns we need */
76   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
77   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
78 
79   ec = 0;
80   for (j=0; j<sbaij->size; j++) {
81     for (i=owners[j]; i<owners[j+1]; i++) {
82       if (indices[i]) {
83         garray[ec]   = i;
84         ec_owner[ec] = j;
85         ec++;
86       }
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) indices[garray[i]] = i;
92 
93   /* compact out the extra columns in B */
94   for (i=0; i<mbs; i++) {
95     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96   }
97   ierr = PetscFree(indices);CHKERRQ(ierr);
98 #endif
99   B->nbs = ec;
100   ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr);
101   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr);
102 
103   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
109   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
110   for (i=0; i<ec; i++) stmp[i] = i;
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
112 
113   /* generate the scatter context
114      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
115   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
116   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
117   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
118 
119   sbaij->garray = garray;
120 
121   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
122   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
123   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
125 
126   ierr = ISDestroy(&from);CHKERRQ(ierr);
127   ierr = ISDestroy(&to);CHKERRQ(ierr);
128 
129   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
130   lsize = (mbs + ec)*bs;
131   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
132   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
133   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
134 
135   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
136 
137   /* x index in the IS sfrom */
138   for (i=0; i<ec; i++) {
139     j          = ec_owner[i];
140     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
141   }
142   /* b index in the IS sfrom */
143   k = sowners[rank]/bs + mbs;
144   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
145   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
146 
147   /* x index in the IS sto */
148   k = sowners[rank]/bs + mbs;
149   for (i=0; i<ec; i++) stmp[i] = (k + i);
150   /* b index in the IS sto */
151   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
152 
153   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
154 
155   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
156   ierr = VecScatterViewFromOptions(sbaij->sMvctx,(PetscObject)mat,"-matmult_vecscatter_view");CHKERRQ(ierr);
157 
158   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
159   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
160   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
161   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
162   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
163 
164   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
165   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
166   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
167 
168   ierr = PetscFree(stmp);CHKERRQ(ierr);
169 
170   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
171   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
172   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
173   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
174   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
175   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
176   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
177   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
178 
179   ierr = PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt));CHKERRQ(ierr);
180   ierr = ISDestroy(&from);CHKERRQ(ierr);
181   ierr = ISDestroy(&to);CHKERRQ(ierr);
182   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 /*
187      Takes the local part of an already assembled MPISBAIJ matrix
188    and disassembles it. This is to allow new nonzeros into the matrix
189    that require more communication in the matrix vector multiply.
190    Thus certain data-structures must be rebuilt.
191 
192    Kind of slow! But that's what application programmers get when
193    they are sloppy.
194 */
195 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
196 {
197   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
198   Mat            B      = baij->B,Bnew;
199   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
200   PetscErrorCode ierr;
201   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
202   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
203   MatScalar      *a = Bbaij->a;
204   PetscScalar    *atmp;
205 #if defined(PETSC_USE_REAL_MAT_SINGLE)
206   PetscInt l;
207 #endif
208 
209   PetscFunctionBegin;
210 #if defined(PETSC_USE_REAL_MAT_SINGLE)
211   ierr = PetscMalloc1(A->rmap->bs,&atmp);CHKERRQ(ierr);
212 #endif
213   /* free stuff related to matrix-vec multiply */
214   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
215   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
216   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
217 
218   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
219   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
220   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
221   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
222   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
223 
224   if (baij->colmap) {
225 #if defined(PETSC_USE_CTABLE)
226     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
227 #else
228     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
229     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
230 #endif
231   }
232 
233   /* make sure that B is assembled so we can access its values */
234   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
235   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
236 
237   /* invent new B and copy stuff over */
238   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
239   for (i=0; i<mbs; i++) {
240     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
241   }
242   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
243   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
244   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
245   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
246   ierr = PetscFree(nz);CHKERRQ(ierr);
247 
248   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
249     ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
250   }
251 
252   /*
253    Ensure that B's nonzerostate is monotonically increasing.
254    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
255    */
256   Bnew->nonzerostate = B->nonzerostate;
257   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
258   for (i=0; i<mbs; i++) {
259     rvals[0] = bs*i;
260     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
261     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
262       col = garray[Bbaij->j[j]]*bs;
263       for (k=0; k<bs; k++) {
264 #if defined(PETSC_USE_REAL_MAT_SINGLE)
265         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
266 #else
267         atmp = a+j*bs2 + k*bs;
268 #endif
269         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
270         col++;
271       }
272     }
273   }
274 #if defined(PETSC_USE_REAL_MAT_SINGLE)
275   ierr = PetscFree(atmp);CHKERRQ(ierr);
276 #endif
277   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
278 
279   baij->garray = NULL;
280 
281   ierr = PetscFree(rvals);CHKERRQ(ierr);
282   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
283   ierr = MatDestroy(&B);CHKERRQ(ierr);
284   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
285 
286   baij->B = Bnew;
287 
288   A->was_assembled = PETSC_FALSE;
289   PetscFunctionReturn(0);
290 }
291