xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f2ed2dc71a2ab9ffda85eae8afa0cbea9ed570de)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
198     if (im[i] >= rstart_orig && im[i] < rend_orig) {
199       row = im[i] - rstart_orig;
200       for (j=0; j<n; j++) {
201         if (in[j] >= cstart_orig && in[j] < cend_orig) {
202           col = in[j] - cstart_orig;
203           if (roworiented) value = v[i*n+j];
204           else             value = v[i+j*m];
205           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
206         } else if (in[j] < 0) continue;
207         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
208         else {
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined(PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
220               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
221               col  =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B    = baij->B;
224               b    = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba   =b->a;
227             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
228             else col += in[j]%bs;
229           } else col = in[j];
230           if (roworiented) value = v[i*n+j];
231           else             value = v[i+j*m];
232           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
233           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         }
235       }
236     } else {
237       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
238       if (!baij->donotstash) {
239         mat->assembled = PETSC_FALSE;
240         if (roworiented) {
241           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
242         } else {
243           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
244         }
245       }
246     }
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
252 {
253   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
254   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
255   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
256   PetscErrorCode    ierr;
257   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
258   PetscBool         roworiented=a->roworiented;
259   const PetscScalar *value     = v;
260   MatScalar         *ap,*aa = a->a,*bap;
261 
262   PetscFunctionBegin;
263   rp   = aj + ai[row];
264   ap   = aa + bs2*ai[row];
265   rmax = imax[row];
266   nrow = ailen[row];
267   value = v;
268   low = 0;
269   high = nrow;
270   while (high-low > 7) {
271     t = (low+high)/2;
272     if (rp[t] > col) high = t;
273     else             low  = t;
274   }
275   for (i=low; i<high; i++) {
276     if (rp[i] > col) break;
277     if (rp[i] == col) {
278       bap = ap +  bs2*i;
279       if (roworiented) {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=ii; jj<bs2; jj+=bs) {
289               bap[jj] = *value++;
290             }
291           }
292         }
293       } else {
294         if (is == ADD_VALUES) {
295           for (ii=0; ii<bs; ii++,value+=bs) {
296             for (jj=0; jj<bs; jj++) {
297               bap[jj] += value[jj];
298             }
299             bap += bs;
300           }
301         } else {
302           for (ii=0; ii<bs; ii++,value+=bs) {
303             for (jj=0; jj<bs; jj++) {
304               bap[jj]  = value[jj];
305             }
306             bap += bs;
307           }
308         }
309       }
310       goto noinsert2;
311     }
312   }
313   if (nonew == 1) goto noinsert2;
314   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
315   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
316   N = nrow++ - 1; high++;
317   /* shift up all the later entries in this row */
318   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
319   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
320   rp[i] = col;
321   bap   = ap +  bs2*i;
322   if (roworiented) {
323     for (ii=0; ii<bs; ii++) {
324       for (jj=ii; jj<bs2; jj+=bs) {
325         bap[jj] = *value++;
326       }
327     }
328   } else {
329     for (ii=0; ii<bs; ii++) {
330       for (jj=0; jj<bs; jj++) {
331         *bap++ = *value++;
332       }
333     }
334   }
335   noinsert2:;
336   ailen[row] = nrow;
337   PetscFunctionReturn(0);
338 }
339 
340 /*
341     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
342     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
343 */
344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
345 {
346   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
347   const PetscScalar *value;
348   MatScalar         *barray     = baij->barray;
349   PetscBool         roworiented = baij->roworiented;
350   PetscErrorCode    ierr;
351   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
352   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
353   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
354 
355   PetscFunctionBegin;
356   if (!barray) {
357     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
358     baij->barray = barray;
359   }
360 
361   if (roworiented) stepval = (n-1)*bs;
362   else stepval = (m-1)*bs;
363 
364   for (i=0; i<m; i++) {
365     if (im[i] < 0) continue;
366     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
367     if (im[i] >= rstart && im[i] < rend) {
368       row = im[i] - rstart;
369       for (j=0; j<n; j++) {
370         /* If NumCol = 1 then a copy is not required */
371         if ((roworiented) && (n == 1)) {
372           barray = (MatScalar*)v + i*bs2;
373         } else if ((!roworiented) && (m == 1)) {
374           barray = (MatScalar*)v + j*bs2;
375         } else { /* Here a copy is required */
376           if (roworiented) {
377             value = v + (i*(stepval+bs) + j)*bs;
378           } else {
379             value = v + (j*(stepval+bs) + i)*bs;
380           }
381           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
382             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
383             barray += bs;
384           }
385           barray -= bs2;
386         }
387 
388         if (in[j] >= cstart && in[j] < cend) {
389           col  = in[j] - cstart;
390           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
391         } else if (in[j] < 0) continue;
392         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
393         else {
394           if (mat->was_assembled) {
395             if (!baij->colmap) {
396               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
397             }
398 
399 #if defined(PETSC_USE_DEBUG)
400 #if defined(PETSC_USE_CTABLE)
401             { PetscInt data;
402               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
403               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
404             }
405 #else
406             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
407 #endif
408 #endif
409 #if defined(PETSC_USE_CTABLE)
410             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
411             col  = (col - 1)/bs;
412 #else
413             col = (baij->colmap[in[j]] - 1)/bs;
414 #endif
415             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
416               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
417               col  =  in[j];
418             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
419           } else col = in[j];
420           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
421         }
422       }
423     } else {
424       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
425       if (!baij->donotstash) {
426         if (roworiented) {
427           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
428         } else {
429           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
430         }
431       }
432     }
433   }
434   PetscFunctionReturn(0);
435 }
436 
437 #define HASH_KEY 0.6180339887
438 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
439 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
440 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
441 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
442 {
443   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
444   PetscBool      roworiented = baij->roworiented;
445   PetscErrorCode ierr;
446   PetscInt       i,j,row,col;
447   PetscInt       rstart_orig=mat->rmap->rstart;
448   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
449   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
450   PetscReal      tmp;
451   MatScalar      **HD = baij->hd,value;
452   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
453 
454   PetscFunctionBegin;
455   for (i=0; i<m; i++) {
456     if (PetscDefined(USE_DEBUG)) {
457       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
458       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
459     }
460     row = im[i];
461     if (row >= rstart_orig && row < rend_orig) {
462       for (j=0; j<n; j++) {
463         col = in[j];
464         if (roworiented) value = v[i*n+j];
465         else             value = v[i+j*m];
466         /* Look up PetscInto the Hash Table */
467         key = (row/bs)*Nbs+(col/bs)+1;
468         h1  = HASH(size,key,tmp);
469 
470 
471         idx = h1;
472         if (PetscDefined(USE_DEBUG)) {
473           insert_ct++;
474           total_ct++;
475           if (HT[idx] != key) {
476             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
477             if (idx == size) {
478               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
479               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
480             }
481           }
482         } else if (HT[idx] != key) {
483           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
484           if (idx == size) {
485             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
486             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
487           }
488         }
489         /* A HASH table entry is found, so insert the values at the correct address */
490         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
491         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
492       }
493     } else if (!baij->donotstash) {
494       if (roworiented) {
495         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
496       } else {
497         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
498       }
499     }
500   }
501   if (PetscDefined(USE_DEBUG)) {
502     baij->ht_total_ct  += total_ct;
503     baij->ht_insert_ct += insert_ct;
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
509 {
510   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
511   PetscBool         roworiented = baij->roworiented;
512   PetscErrorCode    ierr;
513   PetscInt          i,j,ii,jj,row,col;
514   PetscInt          rstart=baij->rstartbs;
515   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
516   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
517   PetscReal         tmp;
518   MatScalar         **HD = baij->hd,*baij_a;
519   const PetscScalar *v_t,*value;
520   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
521 
522   PetscFunctionBegin;
523   if (roworiented) stepval = (n-1)*bs;
524   else stepval = (m-1)*bs;
525 
526   for (i=0; i<m; i++) {
527     if (PetscDefined(USE_DEBUG)) {
528       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
529       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
530     }
531     row = im[i];
532     v_t = v + i*nbs2;
533     if (row >= rstart && row < rend) {
534       for (j=0; j<n; j++) {
535         col = in[j];
536 
537         /* Look up into the Hash Table */
538         key = row*Nbs+col+1;
539         h1  = HASH(size,key,tmp);
540 
541         idx = h1;
542         if (PetscDefined(USE_DEBUG)) {
543           total_ct++;
544           insert_ct++;
545           if (HT[idx] != key) {
546             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
547             if (idx == size) {
548               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
549               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
550             }
551           }
552         } else if (HT[idx] != key) {
553           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
554           if (idx == size) {
555             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
556             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
557           }
558         }
559         baij_a = HD[idx];
560         if (roworiented) {
561           /*value = v + i*(stepval+bs)*bs + j*bs;*/
562           /* value = v + (i*(stepval+bs)+j)*bs; */
563           value = v_t;
564           v_t  += bs;
565           if (addv == ADD_VALUES) {
566             for (ii=0; ii<bs; ii++,value+=stepval) {
567               for (jj=ii; jj<bs2; jj+=bs) {
568                 baij_a[jj] += *value++;
569               }
570             }
571           } else {
572             for (ii=0; ii<bs; ii++,value+=stepval) {
573               for (jj=ii; jj<bs2; jj+=bs) {
574                 baij_a[jj] = *value++;
575               }
576             }
577           }
578         } else {
579           value = v + j*(stepval+bs)*bs + i*bs;
580           if (addv == ADD_VALUES) {
581             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
582               for (jj=0; jj<bs; jj++) {
583                 baij_a[jj] += *value++;
584               }
585             }
586           } else {
587             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
588               for (jj=0; jj<bs; jj++) {
589                 baij_a[jj] = *value++;
590               }
591             }
592           }
593         }
594       }
595     } else {
596       if (!baij->donotstash) {
597         if (roworiented) {
598           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
599         } else {
600           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
601         }
602       }
603     }
604   }
605   if (PetscDefined(USE_DEBUG)) {
606     baij->ht_total_ct  += total_ct;
607     baij->ht_insert_ct += insert_ct;
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613 {
614   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
615   PetscErrorCode ierr;
616   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
617   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
618 
619   PetscFunctionBegin;
620   for (i=0; i<m; i++) {
621     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
622     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
623     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624       row = idxm[i] - bsrstart;
625       for (j=0; j<n; j++) {
626         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
627         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
628         if (idxn[j] >= bscstart && idxn[j] < bscend) {
629           col  = idxn[j] - bscstart;
630           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!baij->colmap) {
633             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined(PETSC_USE_CTABLE)
636           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
637           data--;
638 #else
639           data = baij->colmap[idxn[j]/bs]-1;
640 #endif
641           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642           else {
643             col  = data + idxn[j]%bs;
644             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645           }
646         }
647       }
648     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
654 {
655   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
656   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
657   PetscErrorCode ierr;
658   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
659   PetscReal      sum = 0.0;
660   MatScalar      *v;
661 
662   PetscFunctionBegin;
663   if (baij->size == 1) {
664     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
665   } else {
666     if (type == NORM_FROBENIUS) {
667       v  = amat->a;
668       nz = amat->nz*bs2;
669       for (i=0; i<nz; i++) {
670         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
671       }
672       v  = bmat->a;
673       nz = bmat->nz*bs2;
674       for (i=0; i<nz; i++) {
675         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
676       }
677       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       *nrm = PetscSqrtReal(*nrm);
679     } else if (type == NORM_1) { /* max column sum */
680       PetscReal *tmp,*tmp2;
681       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
682       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
683       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
684       v    = amat->a; jj = amat->j;
685       for (i=0; i<amat->nz; i++) {
686         for (j=0; j<bs; j++) {
687           col = bs*(cstart + *jj) + j; /* column index */
688           for (row=0; row<bs; row++) {
689             tmp[col] += PetscAbsScalar(*v);  v++;
690           }
691         }
692         jj++;
693       }
694       v = bmat->a; jj = bmat->j;
695       for (i=0; i<bmat->nz; i++) {
696         for (j=0; j<bs; j++) {
697           col = bs*garray[*jj] + j;
698           for (row=0; row<bs; row++) {
699             tmp[col] += PetscAbsScalar(*v); v++;
700           }
701         }
702         jj++;
703       }
704       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705       *nrm = 0.0;
706       for (j=0; j<mat->cmap->N; j++) {
707         if (tmp2[j] > *nrm) *nrm = tmp2[j];
708       }
709       ierr = PetscFree(tmp);CHKERRQ(ierr);
710       ierr = PetscFree(tmp2);CHKERRQ(ierr);
711     } else if (type == NORM_INFINITY) { /* max row sum */
712       PetscReal *sums;
713       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
714       sum  = 0.0;
715       for (j=0; j<amat->mbs; j++) {
716         for (row=0; row<bs; row++) sums[row] = 0.0;
717         v  = amat->a + bs2*amat->i[j];
718         nz = amat->i[j+1]-amat->i[j];
719         for (i=0; i<nz; i++) {
720           for (col=0; col<bs; col++) {
721             for (row=0; row<bs; row++) {
722               sums[row] += PetscAbsScalar(*v); v++;
723             }
724           }
725         }
726         v  = bmat->a + bs2*bmat->i[j];
727         nz = bmat->i[j+1]-bmat->i[j];
728         for (i=0; i<nz; i++) {
729           for (col=0; col<bs; col++) {
730             for (row=0; row<bs; row++) {
731               sums[row] += PetscAbsScalar(*v); v++;
732             }
733           }
734         }
735         for (row=0; row<bs; row++) {
736           if (sums[row] > sum) sum = sums[row];
737         }
738       }
739       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740       ierr = PetscFree(sums);CHKERRQ(ierr);
741     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 /*
747   Creates the hash table, and sets the table
748   This table is created only once.
749   If new entried need to be added to the matrix
750   then the hash table has to be destroyed and
751   recreated.
752 */
753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
754 {
755   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
756   Mat            A     = baij->A,B=baij->B;
757   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
758   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
759   PetscErrorCode ierr;
760   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
761   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
762   PetscInt       *HT,key;
763   MatScalar      **HD;
764   PetscReal      tmp;
765 #if defined(PETSC_USE_INFO)
766   PetscInt ct=0,max=0;
767 #endif
768 
769   PetscFunctionBegin;
770   if (baij->ht) PetscFunctionReturn(0);
771 
772   baij->ht_size = (PetscInt)(factor*nz);
773   ht_size       = baij->ht_size;
774 
775   /* Allocate Memory for Hash Table */
776   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
777   HD   = baij->hd;
778   HT   = baij->ht;
779 
780   /* Loop Over A */
781   for (i=0; i<a->mbs; i++) {
782     for (j=ai[i]; j<ai[i+1]; j++) {
783       row = i+rstart;
784       col = aj[j]+cstart;
785 
786       key = row*Nbs + col + 1;
787       h1  = HASH(ht_size,key,tmp);
788       for (k=0; k<ht_size; k++) {
789         if (!HT[(h1+k)%ht_size]) {
790           HT[(h1+k)%ht_size] = key;
791           HD[(h1+k)%ht_size] = a->a + j*bs2;
792           break;
793 #if defined(PETSC_USE_INFO)
794         } else {
795           ct++;
796 #endif
797         }
798       }
799 #if defined(PETSC_USE_INFO)
800       if (k> max) max = k;
801 #endif
802     }
803   }
804   /* Loop Over B */
805   for (i=0; i<b->mbs; i++) {
806     for (j=bi[i]; j<bi[i+1]; j++) {
807       row = i+rstart;
808       col = garray[bj[j]];
809       key = row*Nbs + col + 1;
810       h1  = HASH(ht_size,key,tmp);
811       for (k=0; k<ht_size; k++) {
812         if (!HT[(h1+k)%ht_size]) {
813           HT[(h1+k)%ht_size] = key;
814           HD[(h1+k)%ht_size] = b->a + j*bs2;
815           break;
816 #if defined(PETSC_USE_INFO)
817         } else {
818           ct++;
819 #endif
820         }
821       }
822 #if defined(PETSC_USE_INFO)
823       if (k> max) max = k;
824 #endif
825     }
826   }
827 
828   /* Print Summary */
829 #if defined(PETSC_USE_INFO)
830   for (i=0,j=0; i<ht_size; i++) {
831     if (HT[i]) j++;
832   }
833   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
834 #endif
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
839 {
840   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
841   PetscErrorCode ierr;
842   PetscInt       nstash,reallocs;
843 
844   PetscFunctionBegin;
845   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
846 
847   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
848   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
849   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
850   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
851   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
852   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
853   PetscFunctionReturn(0);
854 }
855 
856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
857 {
858   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
859   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
860   PetscErrorCode ierr;
861   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
862   PetscInt       *row,*col;
863   PetscBool      r1,r2,r3,other_disassembled;
864   MatScalar      *val;
865   PetscMPIInt    n;
866 
867   PetscFunctionBegin;
868   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
869   if (!baij->donotstash && !mat->nooffprocentries) {
870     while (1) {
871       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
872       if (!flg) break;
873 
874       for (i=0; i<n;) {
875         /* Now identify the consecutive vals belonging to the same row */
876         for (j=i,rstart=row[j]; j<n; j++) {
877           if (row[j] != rstart) break;
878         }
879         if (j < n) ncols = j-i;
880         else       ncols = n-i;
881         /* Now assemble all these values with a single function call */
882         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
883         i    = j;
884       }
885     }
886     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
887     /* Now process the block-stash. Since the values are stashed column-oriented,
888        set the roworiented flag to column oriented, and after MatSetValues()
889        restore the original flags */
890     r1 = baij->roworiented;
891     r2 = a->roworiented;
892     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
893 
894     baij->roworiented = PETSC_FALSE;
895     a->roworiented    = PETSC_FALSE;
896 
897     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
898     while (1) {
899       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
900       if (!flg) break;
901 
902       for (i=0; i<n;) {
903         /* Now identify the consecutive vals belonging to the same row */
904         for (j=i,rstart=row[j]; j<n; j++) {
905           if (row[j] != rstart) break;
906         }
907         if (j < n) ncols = j-i;
908         else       ncols = n-i;
909         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
910         i    = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
914 
915     baij->roworiented = r1;
916     a->roworiented    = r2;
917 
918     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
919   }
920 
921   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
922   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
923 
924   /* determine if any processor has disassembled, if so we must
925      also disassemble ourselfs, in order that we may reassemble. */
926   /*
927      if nonzero structure of submatrix B cannot change then we know that
928      no processor disassembled thus we can skip this stuff
929   */
930   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
931     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932     if (mat->was_assembled && !other_disassembled) {
933       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
934     }
935   }
936 
937   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
938     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
939   }
940   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
941   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
942 
943 #if defined(PETSC_USE_INFO)
944   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
945     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
946 
947     baij->ht_total_ct  = 0;
948     baij->ht_insert_ct = 0;
949   }
950 #endif
951   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
952     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
953 
954     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
955     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
956   }
957 
958   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
959 
960   baij->rowvalues = 0;
961 
962   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
963   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
964     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
965     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
971 #include <petscdraw.h>
972 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
973 {
974   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
975   PetscErrorCode    ierr;
976   PetscMPIInt       rank = baij->rank;
977   PetscInt          bs   = mat->rmap->bs;
978   PetscBool         iascii,isdraw;
979   PetscViewer       sviewer;
980   PetscViewerFormat format;
981 
982   PetscFunctionBegin;
983   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
984   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
985   if (iascii) {
986     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
987     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
988       MatInfo info;
989       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
990       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
991       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
992       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
993                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
994       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
995       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
996       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
997       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
998       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
999       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1001       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1002       PetscFunctionReturn(0);
1003     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1004       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1005       PetscFunctionReturn(0);
1006     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1007       PetscFunctionReturn(0);
1008     }
1009   }
1010 
1011   if (isdraw) {
1012     PetscDraw draw;
1013     PetscBool isnull;
1014     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1015     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1016     if (isnull) PetscFunctionReturn(0);
1017   }
1018 
1019   {
1020     /* assemble the entire matrix onto first processor. */
1021     Mat         A;
1022     Mat_SeqBAIJ *Aloc;
1023     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1024     MatScalar   *a;
1025     const char  *matname;
1026 
1027     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1028     /* Perhaps this should be the type of mat? */
1029     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1030     if (!rank) {
1031       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1032     } else {
1033       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1034     }
1035     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1036     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1037     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1038     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1039 
1040     /* copy over the A part */
1041     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1042     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1043     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1044 
1045     for (i=0; i<mbs; i++) {
1046       rvals[0] = bs*(baij->rstartbs + i);
1047       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1048       for (j=ai[i]; j<ai[i+1]; j++) {
1049         col = (baij->cstartbs+aj[j])*bs;
1050         for (k=0; k<bs; k++) {
1051           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1052           col++; a += bs;
1053         }
1054       }
1055     }
1056     /* copy over the B part */
1057     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     for (i=0; i<mbs; i++) {
1060       rvals[0] = bs*(baij->rstartbs + i);
1061       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1062       for (j=ai[i]; j<ai[i+1]; j++) {
1063         col = baij->garray[aj[j]]*bs;
1064         for (k=0; k<bs; k++) {
1065           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1066           col++; a += bs;
1067         }
1068       }
1069     }
1070     ierr = PetscFree(rvals);CHKERRQ(ierr);
1071     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1072     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1073     /*
1074        Everyone has to call to draw the matrix since the graphics waits are
1075        synchronized across all processors that share the PetscDraw object
1076     */
1077     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1078     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1079     if (!rank) {
1080       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1081       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1082     }
1083     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1084     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1085     ierr = MatDestroy(&A);CHKERRQ(ierr);
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1091 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1092 {
1093   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1094   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1095   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1096   const PetscInt *garray = aij->garray;
1097   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1098   PetscInt       *rowlens,*colidxs;
1099   PetscScalar    *matvals;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1104 
1105   M  = mat->rmap->N;
1106   N  = mat->cmap->N;
1107   m  = mat->rmap->n;
1108   rs = mat->rmap->rstart;
1109   cs = mat->cmap->rstart;
1110   bs = mat->rmap->bs;
1111   nz = bs*bs*(A->nz + B->nz);
1112 
1113   /* write matrix header */
1114   header[0] = MAT_FILE_CLASSID;
1115   header[1] = M; header[2] = N; header[3] = nz;
1116   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1117   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1118 
1119   /* fill in and store row lengths */
1120   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1121   for (cnt=0, i=0; i<A->mbs; i++)
1122     for (j=0; j<bs; j++)
1123       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1124   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1125   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1126 
1127   /* fill in and store column indices */
1128   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1129   for (cnt=0, i=0; i<A->mbs; i++) {
1130     for (k=0; k<bs; k++) {
1131       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1132         if (garray[B->j[jb]] > cs/bs) break;
1133         for (l=0; l<bs; l++)
1134           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1135       }
1136       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1137         for (l=0; l<bs; l++)
1138           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1139       for (; jb<B->i[i+1]; jb++)
1140         for (l=0; l<bs; l++)
1141           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1142     }
1143   }
1144   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1145   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1146   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1147 
1148   /* fill in and store nonzero values */
1149   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1150   for (cnt=0, i=0; i<A->mbs; i++) {
1151     for (k=0; k<bs; k++) {
1152       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1153         if (garray[B->j[jb]] > cs/bs) break;
1154         for (l=0; l<bs; l++)
1155           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1156       }
1157       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1158         for (l=0; l<bs; l++)
1159           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1160       for (; jb<B->i[i+1]; jb++)
1161         for (l=0; l<bs; l++)
1162           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1163     }
1164   }
1165   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1166   ierr = PetscFree(matvals);CHKERRQ(ierr);
1167 
1168   /* write block size option to the viewer's .info file */
1169   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1174 {
1175   PetscErrorCode ierr;
1176   PetscBool      iascii,isdraw,issocket,isbinary;
1177 
1178   PetscFunctionBegin;
1179   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1180   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1181   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1182   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1183   if (iascii || isdraw || issocket) {
1184     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1185   } else if (isbinary) {
1186     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1192 {
1193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1202   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1212   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1213   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1214   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1215   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1216   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1217 
1218   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1227 #if defined(PETSC_HAVE_HYPRE)
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1229 #endif
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238   PetscInt       nt;
1239 
1240   PetscFunctionBegin;
1241   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1242   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1243   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1244   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1245   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1247   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1253 {
1254   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1259   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1260   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1266 {
1267   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1273   /* do local part */
1274   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1275   /* add partial results together */
1276   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1277   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1282 {
1283   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   /* do nondiagonal part */
1288   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1289   /* do local part */
1290   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1291   /* add partial results together */
1292   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /*
1298   This only works correctly for square matrices where the subblock A->A is the
1299    diagonal block
1300 */
1301 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1302 {
1303   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1308   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1313 {
1314   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1319   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1326   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1327   PetscErrorCode ierr;
1328   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1329   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1330   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1331 
1332   PetscFunctionBegin;
1333   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1334   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1335   mat->getrowactive = PETSC_TRUE;
1336 
1337   if (!mat->rowvalues && (idx || v)) {
1338     /*
1339         allocate enough space to hold information from the longest row.
1340     */
1341     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1342     PetscInt    max = 1,mbs = mat->mbs,tmp;
1343     for (i=0; i<mbs; i++) {
1344       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1345       if (max < tmp) max = tmp;
1346     }
1347     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1348   }
1349   lrow = row - brstart;
1350 
1351   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1352   if (!v)   {pvA = 0; pvB = 0;}
1353   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1354   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1355   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1356   nztot = nzA + nzB;
1357 
1358   cmap = mat->garray;
1359   if (v  || idx) {
1360     if (nztot) {
1361       /* Sort by increasing column numbers, assuming A and B already sorted */
1362       PetscInt imark = -1;
1363       if (v) {
1364         *v = v_p = mat->rowvalues;
1365         for (i=0; i<nzB; i++) {
1366           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1367           else break;
1368         }
1369         imark = i;
1370         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1371         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1372       }
1373       if (idx) {
1374         *idx = idx_p = mat->rowindices;
1375         if (imark > -1) {
1376           for (i=0; i<imark; i++) {
1377             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1378           }
1379         } else {
1380           for (i=0; i<nzB; i++) {
1381             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1382             else break;
1383           }
1384           imark = i;
1385         }
1386         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1387         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1388       }
1389     } else {
1390       if (idx) *idx = 0;
1391       if (v)   *v   = 0;
1392     }
1393   }
1394   *nz  = nztot;
1395   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1396   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1401 {
1402   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1403 
1404   PetscFunctionBegin;
1405   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1406   baij->getrowactive = PETSC_FALSE;
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1411 {
1412   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1422 {
1423   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1424   Mat            A  = a->A,B = a->B;
1425   PetscErrorCode ierr;
1426   PetscLogDouble isend[5],irecv[5];
1427 
1428   PetscFunctionBegin;
1429   info->block_size = (PetscReal)matin->rmap->bs;
1430 
1431   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1432 
1433   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1434   isend[3] = info->memory;  isend[4] = info->mallocs;
1435 
1436   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1437 
1438   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1439   isend[3] += info->memory;  isend[4] += info->mallocs;
1440 
1441   if (flag == MAT_LOCAL) {
1442     info->nz_used      = isend[0];
1443     info->nz_allocated = isend[1];
1444     info->nz_unneeded  = isend[2];
1445     info->memory       = isend[3];
1446     info->mallocs      = isend[4];
1447   } else if (flag == MAT_GLOBAL_MAX) {
1448     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1449 
1450     info->nz_used      = irecv[0];
1451     info->nz_allocated = irecv[1];
1452     info->nz_unneeded  = irecv[2];
1453     info->memory       = irecv[3];
1454     info->mallocs      = irecv[4];
1455   } else if (flag == MAT_GLOBAL_SUM) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1464   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1465   info->fill_ratio_needed = 0;
1466   info->factor_mallocs    = 0;
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1471 {
1472   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   switch (op) {
1477   case MAT_NEW_NONZERO_LOCATIONS:
1478   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1479   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1480   case MAT_KEEP_NONZERO_PATTERN:
1481   case MAT_NEW_NONZERO_LOCATION_ERR:
1482     MatCheckPreallocated(A,1);
1483     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1484     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1485     break;
1486   case MAT_ROW_ORIENTED:
1487     MatCheckPreallocated(A,1);
1488     a->roworiented = flg;
1489 
1490     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1491     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1492     break;
1493   case MAT_NEW_DIAGONALS:
1494   case MAT_SORTED_FULL:
1495     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1496     break;
1497   case MAT_IGNORE_OFF_PROC_ENTRIES:
1498     a->donotstash = flg;
1499     break;
1500   case MAT_USE_HASH_TABLE:
1501     a->ht_flag = flg;
1502     a->ht_fact = 1.39;
1503     break;
1504   case MAT_SYMMETRIC:
1505   case MAT_STRUCTURALLY_SYMMETRIC:
1506   case MAT_HERMITIAN:
1507   case MAT_SUBMAT_SINGLEIS:
1508   case MAT_SYMMETRY_ETERNAL:
1509     MatCheckPreallocated(A,1);
1510     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1511     break;
1512   default:
1513     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1519 {
1520   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1521   Mat_SeqBAIJ    *Aloc;
1522   Mat            B;
1523   PetscErrorCode ierr;
1524   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1525   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1526   MatScalar      *a;
1527 
1528   PetscFunctionBegin;
1529   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1530     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1531     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1532     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1533     /* Do not know preallocation information, but must set block size */
1534     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1535   } else {
1536     B = *matout;
1537   }
1538 
1539   /* copy over the A part */
1540   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1541   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1543 
1544   for (i=0; i<mbs; i++) {
1545     rvals[0] = bs*(baij->rstartbs + i);
1546     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1547     for (j=ai[i]; j<ai[i+1]; j++) {
1548       col = (baij->cstartbs+aj[j])*bs;
1549       for (k=0; k<bs; k++) {
1550         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1551 
1552         col++; a += bs;
1553       }
1554     }
1555   }
1556   /* copy over the B part */
1557   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1558   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1559   for (i=0; i<mbs; i++) {
1560     rvals[0] = bs*(baij->rstartbs + i);
1561     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1562     for (j=ai[i]; j<ai[i+1]; j++) {
1563       col = baij->garray[aj[j]]*bs;
1564       for (k=0; k<bs; k++) {
1565         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1566         col++;
1567         a += bs;
1568       }
1569     }
1570   }
1571   ierr = PetscFree(rvals);CHKERRQ(ierr);
1572   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1573   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1574 
1575   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576   else {
1577     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1583 {
1584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1585   Mat            a     = baij->A,b = baij->B;
1586   PetscErrorCode ierr;
1587   PetscInt       s1,s2,s3;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1591   if (rr) {
1592     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1593     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1594     /* Overlap communication with computation. */
1595     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596   }
1597   if (ll) {
1598     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1599     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1600     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1601   }
1602   /* scale  the diagonal block */
1603   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1604 
1605   if (rr) {
1606     /* Do a scatter end and then right scale the off-diagonal block */
1607     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1614 {
1615   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1616   PetscInt      *lrows;
1617   PetscInt       r, len;
1618   PetscBool      cong;
1619   PetscErrorCode ierr;
1620 
1621   PetscFunctionBegin;
1622   /* get locally owned rows */
1623   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1624   /* fix right hand side if needed */
1625   if (x && b) {
1626     const PetscScalar *xx;
1627     PetscScalar       *bb;
1628 
1629     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1630     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1631     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1632     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1633     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1634   }
1635 
1636   /* actually zap the local rows */
1637   /*
1638         Zero the required rows. If the "diagonal block" of the matrix
1639      is square and the user wishes to set the diagonal we use separate
1640      code so that MatSetValues() is not called for each diagonal allocating
1641      new memory, thus calling lots of mallocs and slowing things down.
1642 
1643   */
1644   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1645   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1646   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1647   if ((diag != 0.0) && cong) {
1648     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1649   } else if (diag != 0.0) {
1650     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1651     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1652        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1653     for (r = 0; r < len; ++r) {
1654       const PetscInt row = lrows[r] + A->rmap->rstart;
1655       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1656     }
1657     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1658     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   } else {
1660     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1661   }
1662   ierr = PetscFree(lrows);CHKERRQ(ierr);
1663 
1664   /* only change matrix nonzero state if pattern was allowed to be changed */
1665   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1666     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1667     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1668   }
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1673 {
1674   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1675   PetscErrorCode    ierr;
1676   PetscMPIInt       n = A->rmap->n,p = 0;
1677   PetscInt          i,j,k,r,len = 0,row,col,count;
1678   PetscInt          *lrows,*owners = A->rmap->range;
1679   PetscSFNode       *rrows;
1680   PetscSF           sf;
1681   const PetscScalar *xx;
1682   PetscScalar       *bb,*mask;
1683   Vec               xmask,lmask;
1684   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1685   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1686   PetscScalar       *aa;
1687 
1688   PetscFunctionBegin;
1689   /* Create SF where leaves are input rows and roots are owned rows */
1690   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1691   for (r = 0; r < n; ++r) lrows[r] = -1;
1692   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1693   for (r = 0; r < N; ++r) {
1694     const PetscInt idx   = rows[r];
1695     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1696     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1697       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1698     }
1699     rrows[r].rank  = p;
1700     rrows[r].index = rows[r] - owners[p];
1701   }
1702   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1703   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1704   /* Collect flags for rows to be zeroed */
1705   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1706   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1707   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1708   /* Compress and put in row numbers */
1709   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1710   /* zero diagonal part of matrix */
1711   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1712   /* handle off diagonal part of matrix */
1713   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1714   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1715   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1716   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1717   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1718   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1720   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1721   if (x) {
1722     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1723     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1725     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1726   }
1727   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1728   /* remove zeroed rows of off diagonal matrix */
1729   for (i = 0; i < len; ++i) {
1730     row   = lrows[i];
1731     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1732     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1733     for (k = 0; k < count; ++k) {
1734       aa[0] = 0.0;
1735       aa   += bs;
1736     }
1737   }
1738   /* loop over all elements of off process part of matrix zeroing removed columns*/
1739   for (i = 0; i < l->B->rmap->N; ++i) {
1740     row = i/bs;
1741     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1742       for (k = 0; k < bs; ++k) {
1743         col = bs*baij->j[j] + k;
1744         if (PetscAbsScalar(mask[col])) {
1745           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1746           if (x) bb[i] -= aa[0]*xx[col];
1747           aa[0] = 0.0;
1748         }
1749       }
1750     }
1751   }
1752   if (x) {
1753     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1754     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1755   }
1756   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1757   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1758   ierr = PetscFree(lrows);CHKERRQ(ierr);
1759 
1760   /* only change matrix nonzero state if pattern was allowed to be changed */
1761   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1762     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1763     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1769 {
1770   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1771   PetscErrorCode ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1779 
1780 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1781 {
1782   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1783   Mat            a,b,c,d;
1784   PetscBool      flg;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   a = matA->A; b = matA->B;
1789   c = matB->A; d = matB->B;
1790 
1791   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1792   if (flg) {
1793     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1794   }
1795   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1800 {
1801   PetscErrorCode ierr;
1802   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1803   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1804 
1805   PetscFunctionBegin;
1806   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1807   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1808     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1809   } else {
1810     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1811     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1812   }
1813   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1818 {
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1827 {
1828   PetscErrorCode ierr;
1829   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1830   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1831   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1832 
1833   PetscFunctionBegin;
1834   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1839 {
1840   PetscErrorCode ierr;
1841   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1842   PetscBLASInt   bnz,one=1;
1843   Mat_SeqBAIJ    *x,*y;
1844   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1845 
1846   PetscFunctionBegin;
1847   if (str == SAME_NONZERO_PATTERN) {
1848     PetscScalar alpha = a;
1849     x    = (Mat_SeqBAIJ*)xx->A->data;
1850     y    = (Mat_SeqBAIJ*)yy->A->data;
1851     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1852     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1853     x    = (Mat_SeqBAIJ*)xx->B->data;
1854     y    = (Mat_SeqBAIJ*)yy->B->data;
1855     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1856     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1857     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1858   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1859     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1860   } else {
1861     Mat      B;
1862     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1863     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1864     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1865     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1866     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1867     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1868     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1869     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1870     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1871     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1872     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1873     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1874     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1875     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1876     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1877     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1878   }
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1883 {
1884   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1885   PetscErrorCode ierr;
1886 
1887   PetscFunctionBegin;
1888   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1889   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1894 {
1895   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1896   PetscErrorCode ierr;
1897 
1898   PetscFunctionBegin;
1899   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1900   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1905 {
1906   PetscErrorCode ierr;
1907   IS             iscol_local;
1908   PetscInt       csize;
1909 
1910   PetscFunctionBegin;
1911   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1912   if (call == MAT_REUSE_MATRIX) {
1913     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1914     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1915   } else {
1916     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1917   }
1918   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1919   if (call == MAT_INITIAL_MATRIX) {
1920     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1921     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 /*
1927   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1928   in local and then by concatenating the local matrices the end result.
1929   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1930   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1931 */
1932 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1933 {
1934   PetscErrorCode ierr;
1935   PetscMPIInt    rank,size;
1936   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1937   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1938   Mat            M,Mreuse;
1939   MatScalar      *vwork,*aa;
1940   MPI_Comm       comm;
1941   IS             isrow_new, iscol_new;
1942   Mat_SeqBAIJ    *aij;
1943 
1944   PetscFunctionBegin;
1945   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1946   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1947   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1948   /* The compression and expansion should be avoided. Doesn't point
1949      out errors, might change the indices, hence buggey */
1950   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1951   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1952 
1953   if (call ==  MAT_REUSE_MATRIX) {
1954     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1955     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1956     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1957   } else {
1958     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1959   }
1960   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1961   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1962   /*
1963       m - number of local rows
1964       n - number of columns (same on all processors)
1965       rstart - first row in new global matrix generated
1966   */
1967   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1968   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1969   m    = m/bs;
1970   n    = n/bs;
1971 
1972   if (call == MAT_INITIAL_MATRIX) {
1973     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1974     ii  = aij->i;
1975     jj  = aij->j;
1976 
1977     /*
1978         Determine the number of non-zeros in the diagonal and off-diagonal
1979         portions of the matrix in order to do correct preallocation
1980     */
1981 
1982     /* first get start and end of "diagonal" columns */
1983     if (csize == PETSC_DECIDE) {
1984       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1985       if (mglobal == n*bs) { /* square matrix */
1986         nlocal = m;
1987       } else {
1988         nlocal = n/size + ((n % size) > rank);
1989       }
1990     } else {
1991       nlocal = csize/bs;
1992     }
1993     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1994     rstart = rend - nlocal;
1995     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1996 
1997     /* next, compute all the lengths */
1998     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
1999     for (i=0; i<m; i++) {
2000       jend = ii[i+1] - ii[i];
2001       olen = 0;
2002       dlen = 0;
2003       for (j=0; j<jend; j++) {
2004         if (*jj < rstart || *jj >= rend) olen++;
2005         else dlen++;
2006         jj++;
2007       }
2008       olens[i] = olen;
2009       dlens[i] = dlen;
2010     }
2011     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2012     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2013     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2014     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2015     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2016     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2017   } else {
2018     PetscInt ml,nl;
2019 
2020     M    = *newmat;
2021     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2022     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2023     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2024     /*
2025          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2026        rather than the slower MatSetValues().
2027     */
2028     M->was_assembled = PETSC_TRUE;
2029     M->assembled     = PETSC_FALSE;
2030   }
2031   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2032   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2033   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2034   ii   = aij->i;
2035   jj   = aij->j;
2036   aa   = aij->a;
2037   for (i=0; i<m; i++) {
2038     row   = rstart/bs + i;
2039     nz    = ii[i+1] - ii[i];
2040     cwork = jj;     jj += nz;
2041     vwork = aa;     aa += nz*bs*bs;
2042     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2043   }
2044 
2045   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2046   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2047   *newmat = M;
2048 
2049   /* save submatrix used in processor for next request */
2050   if (call ==  MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2052     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2058 {
2059   MPI_Comm       comm,pcomm;
2060   PetscInt       clocal_size,nrows;
2061   const PetscInt *rows;
2062   PetscMPIInt    size;
2063   IS             crowp,lcolp;
2064   PetscErrorCode ierr;
2065 
2066   PetscFunctionBegin;
2067   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2068   /* make a collective version of 'rowp' */
2069   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2070   if (pcomm==comm) {
2071     crowp = rowp;
2072   } else {
2073     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2074     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2075     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2076     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2077   }
2078   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2079   /* make a local version of 'colp' */
2080   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2081   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2082   if (size==1) {
2083     lcolp = colp;
2084   } else {
2085     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2086   }
2087   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2088   /* now we just get the submatrix */
2089   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2090   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2091   /* clean up */
2092   if (pcomm!=comm) {
2093     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2094   }
2095   if (size>1) {
2096     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2097   }
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2102 {
2103   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2104   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2105 
2106   PetscFunctionBegin;
2107   if (nghosts) *nghosts = B->nbs;
2108   if (ghosts) *ghosts = baij->garray;
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2113 {
2114   Mat            B;
2115   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2116   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2117   Mat_SeqAIJ     *b;
2118   PetscErrorCode ierr;
2119   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2120   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2121   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2122 
2123   PetscFunctionBegin;
2124   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2125   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2126 
2127   /* ----------------------------------------------------------------
2128      Tell every processor the number of nonzeros per row
2129   */
2130   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2131   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2132     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2133   }
2134   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2135   displs    = recvcounts + size;
2136   for (i=0; i<size; i++) {
2137     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2138     displs[i]     = A->rmap->range[i]/bs;
2139   }
2140 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2141   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2142 #else
2143   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2144   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2145 #endif
2146   /* ---------------------------------------------------------------
2147      Create the sequential matrix of the same type as the local block diagonal
2148   */
2149   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2150   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2151   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2152   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2153   b    = (Mat_SeqAIJ*)B->data;
2154 
2155   /*--------------------------------------------------------------------
2156     Copy my part of matrix column indices over
2157   */
2158   sendcount  = ad->nz + bd->nz;
2159   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2160   a_jsendbuf = ad->j;
2161   b_jsendbuf = bd->j;
2162   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2163   cnt        = 0;
2164   for (i=0; i<n; i++) {
2165 
2166     /* put in lower diagonal portion */
2167     m = bd->i[i+1] - bd->i[i];
2168     while (m > 0) {
2169       /* is it above diagonal (in bd (compressed) numbering) */
2170       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2171       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2172       m--;
2173     }
2174 
2175     /* put in diagonal portion */
2176     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2177       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2178     }
2179 
2180     /* put in upper diagonal portion */
2181     while (m-- > 0) {
2182       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2183     }
2184   }
2185   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2186 
2187   /*--------------------------------------------------------------------
2188     Gather all column indices to all processors
2189   */
2190   for (i=0; i<size; i++) {
2191     recvcounts[i] = 0;
2192     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2193       recvcounts[i] += lens[j];
2194     }
2195   }
2196   displs[0] = 0;
2197   for (i=1; i<size; i++) {
2198     displs[i] = displs[i-1] + recvcounts[i-1];
2199   }
2200 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2201   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2202 #else
2203   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2204 #endif
2205   /*--------------------------------------------------------------------
2206     Assemble the matrix into useable form (note numerical values not yet set)
2207   */
2208   /* set the b->ilen (length of each row) values */
2209   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2210   /* set the b->i indices */
2211   b->i[0] = 0;
2212   for (i=1; i<=A->rmap->N/bs; i++) {
2213     b->i[i] = b->i[i-1] + lens[i-1];
2214   }
2215   ierr = PetscFree(lens);CHKERRQ(ierr);
2216   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2217   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2218   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2219 
2220   if (A->symmetric) {
2221     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2222   } else if (A->hermitian) {
2223     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2224   } else if (A->structurally_symmetric) {
2225     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2226   }
2227   *newmat = B;
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2232 {
2233   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2234   PetscErrorCode ierr;
2235   Vec            bb1 = 0;
2236 
2237   PetscFunctionBegin;
2238   if (flag == SOR_APPLY_UPPER) {
2239     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2240     PetscFunctionReturn(0);
2241   }
2242 
2243   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2244     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2245   }
2246 
2247   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2248     if (flag & SOR_ZERO_INITIAL_GUESS) {
2249       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2250       its--;
2251     }
2252 
2253     while (its--) {
2254       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2255       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2256 
2257       /* update rhs: bb1 = bb - B*x */
2258       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2259       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2260 
2261       /* local sweep */
2262       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2263     }
2264   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2265     if (flag & SOR_ZERO_INITIAL_GUESS) {
2266       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2267       its--;
2268     }
2269     while (its--) {
2270       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2271       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2272 
2273       /* update rhs: bb1 = bb - B*x */
2274       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2275       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2276 
2277       /* local sweep */
2278       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2279     }
2280   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2281     if (flag & SOR_ZERO_INITIAL_GUESS) {
2282       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2283       its--;
2284     }
2285     while (its--) {
2286       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2287       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2288 
2289       /* update rhs: bb1 = bb - B*x */
2290       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2291       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2292 
2293       /* local sweep */
2294       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2295     }
2296   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2297 
2298   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2303 {
2304   PetscErrorCode ierr;
2305   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2306   PetscInt       N,i,*garray = aij->garray;
2307   PetscInt       ib,jb,bs = A->rmap->bs;
2308   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2309   MatScalar      *a_val = a_aij->a;
2310   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2311   MatScalar      *b_val = b_aij->a;
2312   PetscReal      *work;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2316   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2317   if (type == NORM_2) {
2318     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2319       for (jb=0; jb<bs; jb++) {
2320         for (ib=0; ib<bs; ib++) {
2321           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2322           a_val++;
2323         }
2324       }
2325     }
2326     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2327       for (jb=0; jb<bs; jb++) {
2328         for (ib=0; ib<bs; ib++) {
2329           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2330           b_val++;
2331         }
2332       }
2333     }
2334   } else if (type == NORM_1) {
2335     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2336       for (jb=0; jb<bs; jb++) {
2337         for (ib=0; ib<bs; ib++) {
2338           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2339           a_val++;
2340         }
2341       }
2342     }
2343     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2344       for (jb=0; jb<bs; jb++) {
2345        for (ib=0; ib<bs; ib++) {
2346           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2347           b_val++;
2348         }
2349       }
2350     }
2351   } else if (type == NORM_INFINITY) {
2352     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2353       for (jb=0; jb<bs; jb++) {
2354         for (ib=0; ib<bs; ib++) {
2355           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2356           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2357           a_val++;
2358         }
2359       }
2360     }
2361     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2362       for (jb=0; jb<bs; jb++) {
2363         for (ib=0; ib<bs; ib++) {
2364           int col = garray[b_aij->j[i]] * bs + jb;
2365           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2366           b_val++;
2367         }
2368       }
2369     }
2370   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2371   if (type == NORM_INFINITY) {
2372     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2373   } else {
2374     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2375   }
2376   ierr = PetscFree(work);CHKERRQ(ierr);
2377   if (type == NORM_2) {
2378     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2384 {
2385   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2390   A->factorerrortype             = a->A->factorerrortype;
2391   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2392   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2397 {
2398   PetscErrorCode ierr;
2399   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2400   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2401 
2402   PetscFunctionBegin;
2403   if (!Y->preallocated) {
2404     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2405   } else if (!aij->nz) {
2406     PetscInt nonew = aij->nonew;
2407     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2408     aij->nonew = nonew;
2409   }
2410   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2415 {
2416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2421   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2422   if (d) {
2423     PetscInt rstart;
2424     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2425     *d += rstart/A->rmap->bs;
2426 
2427   }
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2432 {
2433   PetscFunctionBegin;
2434   *a = ((Mat_MPIBAIJ*)A->data)->A;
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 /* -------------------------------------------------------------------*/
2439 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2440                                        MatGetRow_MPIBAIJ,
2441                                        MatRestoreRow_MPIBAIJ,
2442                                        MatMult_MPIBAIJ,
2443                                 /* 4*/ MatMultAdd_MPIBAIJ,
2444                                        MatMultTranspose_MPIBAIJ,
2445                                        MatMultTransposeAdd_MPIBAIJ,
2446                                        0,
2447                                        0,
2448                                        0,
2449                                 /*10*/ 0,
2450                                        0,
2451                                        0,
2452                                        MatSOR_MPIBAIJ,
2453                                        MatTranspose_MPIBAIJ,
2454                                 /*15*/ MatGetInfo_MPIBAIJ,
2455                                        MatEqual_MPIBAIJ,
2456                                        MatGetDiagonal_MPIBAIJ,
2457                                        MatDiagonalScale_MPIBAIJ,
2458                                        MatNorm_MPIBAIJ,
2459                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2460                                        MatAssemblyEnd_MPIBAIJ,
2461                                        MatSetOption_MPIBAIJ,
2462                                        MatZeroEntries_MPIBAIJ,
2463                                 /*24*/ MatZeroRows_MPIBAIJ,
2464                                        0,
2465                                        0,
2466                                        0,
2467                                        0,
2468                                 /*29*/ MatSetUp_MPIBAIJ,
2469                                        0,
2470                                        0,
2471                                        MatGetDiagonalBlock_MPIBAIJ,
2472                                        0,
2473                                 /*34*/ MatDuplicate_MPIBAIJ,
2474                                        0,
2475                                        0,
2476                                        0,
2477                                        0,
2478                                 /*39*/ MatAXPY_MPIBAIJ,
2479                                        MatCreateSubMatrices_MPIBAIJ,
2480                                        MatIncreaseOverlap_MPIBAIJ,
2481                                        MatGetValues_MPIBAIJ,
2482                                        MatCopy_MPIBAIJ,
2483                                 /*44*/ 0,
2484                                        MatScale_MPIBAIJ,
2485                                        MatShift_MPIBAIJ,
2486                                        0,
2487                                        MatZeroRowsColumns_MPIBAIJ,
2488                                 /*49*/ 0,
2489                                        0,
2490                                        0,
2491                                        0,
2492                                        0,
2493                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2494                                        0,
2495                                        MatSetUnfactored_MPIBAIJ,
2496                                        MatPermute_MPIBAIJ,
2497                                        MatSetValuesBlocked_MPIBAIJ,
2498                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2499                                        MatDestroy_MPIBAIJ,
2500                                        MatView_MPIBAIJ,
2501                                        0,
2502                                        0,
2503                                 /*64*/ 0,
2504                                        0,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2509                                        0,
2510                                        0,
2511                                        0,
2512                                        0,
2513                                 /*74*/ 0,
2514                                        MatFDColoringApply_BAIJ,
2515                                        0,
2516                                        0,
2517                                        0,
2518                                 /*79*/ 0,
2519                                        0,
2520                                        0,
2521                                        0,
2522                                        MatLoad_MPIBAIJ,
2523                                 /*84*/ 0,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                 /*89*/ 0,
2529                                        0,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                 /*94*/ 0,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                 /*99*/ 0,
2539                                        0,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                 /*104*/0,
2544                                        MatRealPart_MPIBAIJ,
2545                                        MatImaginaryPart_MPIBAIJ,
2546                                        0,
2547                                        0,
2548                                 /*109*/0,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                        MatMissingDiagonal_MPIBAIJ,
2553                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2554                                        0,
2555                                        MatGetGhosts_MPIBAIJ,
2556                                        0,
2557                                        0,
2558                                 /*119*/0,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                        MatGetMultiProcBlock_MPIBAIJ,
2563                                 /*124*/0,
2564                                        MatGetColumnNorms_MPIBAIJ,
2565                                        MatInvertBlockDiagonal_MPIBAIJ,
2566                                        0,
2567                                        0,
2568                                /*129*/ 0,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                /*134*/ 0,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                /*139*/ MatSetBlockSizes_Default,
2579                                        0,
2580                                        0,
2581                                        MatFDColoringSetUp_MPIXAIJ,
2582                                        0,
2583                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2584 };
2585 
2586 
2587 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2588 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2589 
2590 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2591 {
2592   PetscInt       m,rstart,cstart,cend;
2593   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2594   const PetscInt *JJ    =0;
2595   PetscScalar    *values=0;
2596   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2597   PetscErrorCode ierr;
2598   PetscBool      nooffprocentries;
2599 
2600   PetscFunctionBegin;
2601   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2602   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2603   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2604   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2605   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2606   m      = B->rmap->n/bs;
2607   rstart = B->rmap->rstart/bs;
2608   cstart = B->cmap->rstart/bs;
2609   cend   = B->cmap->rend/bs;
2610 
2611   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2612   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2613   for (i=0; i<m; i++) {
2614     nz = ii[i+1] - ii[i];
2615     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2616     nz_max = PetscMax(nz_max,nz);
2617     dlen   = 0;
2618     olen   = 0;
2619     JJ     = jj + ii[i];
2620     for (j=0; j<nz; j++) {
2621       if (*JJ < cstart || *JJ >= cend) olen++;
2622       else dlen++;
2623       JJ++;
2624     }
2625     d_nnz[i] = dlen;
2626     o_nnz[i] = olen;
2627   }
2628   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2629   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2630 
2631   values = (PetscScalar*)V;
2632   if (!values) {
2633     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2634   }
2635   for (i=0; i<m; i++) {
2636     PetscInt          row    = i + rstart;
2637     PetscInt          ncols  = ii[i+1] - ii[i];
2638     const PetscInt    *icols = jj + ii[i];
2639     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2640       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2641       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2642     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2643       PetscInt j;
2644       for (j=0; j<ncols; j++) {
2645         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2646         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2647       }
2648     }
2649   }
2650 
2651   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2652   nooffprocentries    = B->nooffprocentries;
2653   B->nooffprocentries = PETSC_TRUE;
2654   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656   B->nooffprocentries = nooffprocentries;
2657 
2658   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 /*@C
2663    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2664 
2665    Collective
2666 
2667    Input Parameters:
2668 +  B - the matrix
2669 .  bs - the block size
2670 .  i - the indices into j for the start of each local row (starts with zero)
2671 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2672 -  v - optional values in the matrix
2673 
2674    Level: advanced
2675 
2676    Notes:
2677     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2678    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2679    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2680    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2681    block column and the second index is over columns within a block.
2682 
2683    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2684 
2685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2686 @*/
2687 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2688 {
2689   PetscErrorCode ierr;
2690 
2691   PetscFunctionBegin;
2692   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2693   PetscValidType(B,1);
2694   PetscValidLogicalCollectiveInt(B,bs,2);
2695   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2700 {
2701   Mat_MPIBAIJ    *b;
2702   PetscErrorCode ierr;
2703   PetscInt       i;
2704   PetscMPIInt    size;
2705 
2706   PetscFunctionBegin;
2707   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2708   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2709   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2710   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2711 
2712   if (d_nnz) {
2713     for (i=0; i<B->rmap->n/bs; i++) {
2714       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2715     }
2716   }
2717   if (o_nnz) {
2718     for (i=0; i<B->rmap->n/bs; i++) {
2719       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2720     }
2721   }
2722 
2723   b      = (Mat_MPIBAIJ*)B->data;
2724   b->bs2 = bs*bs;
2725   b->mbs = B->rmap->n/bs;
2726   b->nbs = B->cmap->n/bs;
2727   b->Mbs = B->rmap->N/bs;
2728   b->Nbs = B->cmap->N/bs;
2729 
2730   for (i=0; i<=b->size; i++) {
2731     b->rangebs[i] = B->rmap->range[i]/bs;
2732   }
2733   b->rstartbs = B->rmap->rstart/bs;
2734   b->rendbs   = B->rmap->rend/bs;
2735   b->cstartbs = B->cmap->rstart/bs;
2736   b->cendbs   = B->cmap->rend/bs;
2737 
2738 #if defined(PETSC_USE_CTABLE)
2739   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2740 #else
2741   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2742 #endif
2743   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2744   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2745   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2746 
2747   /* Because the B will have been resized we simply destroy it and create a new one each time */
2748   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2749   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2750   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2751   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2752   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2753   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2754 
2755   if (!B->preallocated) {
2756     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2757     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2758     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2759     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2760     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2761   }
2762 
2763   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2764   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2765   B->preallocated  = PETSC_TRUE;
2766   B->was_assembled = PETSC_FALSE;
2767   B->assembled     = PETSC_FALSE;
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2772 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2773 
2774 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2775 {
2776   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2777   PetscErrorCode ierr;
2778   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2779   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2780   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2781 
2782   PetscFunctionBegin;
2783   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2784   ii[0] = 0;
2785   for (i=0; i<M; i++) {
2786     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2787     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2788     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2789     /* remove one from count of matrix has diagonal */
2790     for (j=id[i]; j<id[i+1]; j++) {
2791       if (jd[j] == i) {ii[i+1]--;break;}
2792     }
2793   }
2794   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2795   cnt  = 0;
2796   for (i=0; i<M; i++) {
2797     for (j=io[i]; j<io[i+1]; j++) {
2798       if (garray[jo[j]] > rstart) break;
2799       jj[cnt++] = garray[jo[j]];
2800     }
2801     for (k=id[i]; k<id[i+1]; k++) {
2802       if (jd[k] != i) {
2803         jj[cnt++] = rstart + jd[k];
2804       }
2805     }
2806     for (; j<io[i+1]; j++) {
2807       jj[cnt++] = garray[jo[j]];
2808     }
2809   }
2810   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2815 
2816 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2817 
2818 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2819 {
2820   PetscErrorCode ierr;
2821   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2822   Mat            B;
2823   Mat_MPIAIJ     *b;
2824 
2825   PetscFunctionBegin;
2826   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2827 
2828   if (reuse == MAT_REUSE_MATRIX) {
2829     B = *newmat;
2830   } else {
2831     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2832     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2833     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2834     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2835     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2836     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2837   }
2838   b = (Mat_MPIAIJ*) B->data;
2839 
2840   if (reuse == MAT_REUSE_MATRIX) {
2841     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2842     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2843   } else {
2844     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2845     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2846     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2847     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2848     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2849     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2850     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2851   }
2852   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2853   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2854 
2855   if (reuse == MAT_INPLACE_MATRIX) {
2856     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2857   } else {
2858    *newmat = B;
2859   }
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 /*MC
2864    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2865 
2866    Options Database Keys:
2867 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2868 . -mat_block_size <bs> - set the blocksize used to store the matrix
2869 - -mat_use_hash_table <fact>
2870 
2871    Level: beginner
2872 
2873    Notes:
2874     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2875     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2876 
2877 .seealso: MatCreateBAIJ
2878 M*/
2879 
2880 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2881 
2882 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2883 {
2884   Mat_MPIBAIJ    *b;
2885   PetscErrorCode ierr;
2886   PetscBool      flg = PETSC_FALSE;
2887 
2888   PetscFunctionBegin;
2889   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2890   B->data = (void*)b;
2891 
2892   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2893   B->assembled = PETSC_FALSE;
2894 
2895   B->insertmode = NOT_SET_VALUES;
2896   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2897   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2898 
2899   /* build local table of row and column ownerships */
2900   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2901 
2902   /* build cache for off array entries formed */
2903   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2904 
2905   b->donotstash  = PETSC_FALSE;
2906   b->colmap      = NULL;
2907   b->garray      = NULL;
2908   b->roworiented = PETSC_TRUE;
2909 
2910   /* stuff used in block assembly */
2911   b->barray = 0;
2912 
2913   /* stuff used for matrix vector multiply */
2914   b->lvec  = 0;
2915   b->Mvctx = 0;
2916 
2917   /* stuff for MatGetRow() */
2918   b->rowindices   = 0;
2919   b->rowvalues    = 0;
2920   b->getrowactive = PETSC_FALSE;
2921 
2922   /* hash table stuff */
2923   b->ht           = 0;
2924   b->hd           = 0;
2925   b->ht_size      = 0;
2926   b->ht_flag      = PETSC_FALSE;
2927   b->ht_fact      = 0;
2928   b->ht_total_ct  = 0;
2929   b->ht_insert_ct = 0;
2930 
2931   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2932   b->ijonly = PETSC_FALSE;
2933 
2934 
2935   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2936   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2937   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2938 #if defined(PETSC_HAVE_HYPRE)
2939   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2940 #endif
2941   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2942   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2943   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2944   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2945   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2947   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2948   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2949 
2950   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2951   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2952   if (flg) {
2953     PetscReal fact = 1.39;
2954     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2955     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2956     if (fact <= 1.0) fact = 1.39;
2957     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2958     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2959   }
2960   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 /*MC
2965    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2966 
2967    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2968    and MATMPIBAIJ otherwise.
2969 
2970    Options Database Keys:
2971 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2972 
2973   Level: beginner
2974 
2975 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2976 M*/
2977 
2978 /*@C
2979    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2980    (block compressed row).  For good matrix assembly performance
2981    the user should preallocate the matrix storage by setting the parameters
2982    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2983    performance can be increased by more than a factor of 50.
2984 
2985    Collective on Mat
2986 
2987    Input Parameters:
2988 +  B - the matrix
2989 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2990           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2991 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2992            submatrix  (same for all local rows)
2993 .  d_nnz - array containing the number of block nonzeros in the various block rows
2994            of the in diagonal portion of the local (possibly different for each block
2995            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2996            set it even if it is zero.
2997 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2998            submatrix (same for all local rows).
2999 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3000            off-diagonal portion of the local submatrix (possibly different for
3001            each block row) or NULL.
3002 
3003    If the *_nnz parameter is given then the *_nz parameter is ignored
3004 
3005    Options Database Keys:
3006 +   -mat_block_size - size of the blocks to use
3007 -   -mat_use_hash_table <fact>
3008 
3009    Notes:
3010    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3011    than it must be used on all processors that share the object for that argument.
3012 
3013    Storage Information:
3014    For a square global matrix we define each processor's diagonal portion
3015    to be its local rows and the corresponding columns (a square submatrix);
3016    each processor's off-diagonal portion encompasses the remainder of the
3017    local matrix (a rectangular submatrix).
3018 
3019    The user can specify preallocated storage for the diagonal part of
3020    the local submatrix with either d_nz or d_nnz (not both).  Set
3021    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3022    memory allocation.  Likewise, specify preallocated storage for the
3023    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3024 
3025    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3026    the figure below we depict these three local rows and all columns (0-11).
3027 
3028 .vb
3029            0 1 2 3 4 5 6 7 8 9 10 11
3030           --------------------------
3031    row 3  |o o o d d d o o o o  o  o
3032    row 4  |o o o d d d o o o o  o  o
3033    row 5  |o o o d d d o o o o  o  o
3034           --------------------------
3035 .ve
3036 
3037    Thus, any entries in the d locations are stored in the d (diagonal)
3038    submatrix, and any entries in the o locations are stored in the
3039    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3040    stored simply in the MATSEQBAIJ format for compressed row storage.
3041 
3042    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3043    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3044    In general, for PDE problems in which most nonzeros are near the diagonal,
3045    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3046    or you will get TERRIBLE performance; see the users' manual chapter on
3047    matrices.
3048 
3049    You can call MatGetInfo() to get information on how effective the preallocation was;
3050    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3051    You can also run with the option -info and look for messages with the string
3052    malloc in them to see if additional memory allocation was needed.
3053 
3054    Level: intermediate
3055 
3056 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3057 @*/
3058 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3059 {
3060   PetscErrorCode ierr;
3061 
3062   PetscFunctionBegin;
3063   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3064   PetscValidType(B,1);
3065   PetscValidLogicalCollectiveInt(B,bs,2);
3066   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 /*@C
3071    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3072    (block compressed row).  For good matrix assembly performance
3073    the user should preallocate the matrix storage by setting the parameters
3074    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3075    performance can be increased by more than a factor of 50.
3076 
3077    Collective
3078 
3079    Input Parameters:
3080 +  comm - MPI communicator
3081 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3082           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3083 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3084            This value should be the same as the local size used in creating the
3085            y vector for the matrix-vector product y = Ax.
3086 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3087            This value should be the same as the local size used in creating the
3088            x vector for the matrix-vector product y = Ax.
3089 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3090 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3091 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3092            submatrix  (same for all local rows)
3093 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3094            of the in diagonal portion of the local (possibly different for each block
3095            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3096            and set it even if it is zero.
3097 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3098            submatrix (same for all local rows).
3099 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3100            off-diagonal portion of the local submatrix (possibly different for
3101            each block row) or NULL.
3102 
3103    Output Parameter:
3104 .  A - the matrix
3105 
3106    Options Database Keys:
3107 +   -mat_block_size - size of the blocks to use
3108 -   -mat_use_hash_table <fact>
3109 
3110    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3111    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3112    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3113 
3114    Notes:
3115    If the *_nnz parameter is given then the *_nz parameter is ignored
3116 
3117    A nonzero block is any block that as 1 or more nonzeros in it
3118 
3119    The user MUST specify either the local or global matrix dimensions
3120    (possibly both).
3121 
3122    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3123    than it must be used on all processors that share the object for that argument.
3124 
3125    Storage Information:
3126    For a square global matrix we define each processor's diagonal portion
3127    to be its local rows and the corresponding columns (a square submatrix);
3128    each processor's off-diagonal portion encompasses the remainder of the
3129    local matrix (a rectangular submatrix).
3130 
3131    The user can specify preallocated storage for the diagonal part of
3132    the local submatrix with either d_nz or d_nnz (not both).  Set
3133    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3134    memory allocation.  Likewise, specify preallocated storage for the
3135    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3136 
3137    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3138    the figure below we depict these three local rows and all columns (0-11).
3139 
3140 .vb
3141            0 1 2 3 4 5 6 7 8 9 10 11
3142           --------------------------
3143    row 3  |o o o d d d o o o o  o  o
3144    row 4  |o o o d d d o o o o  o  o
3145    row 5  |o o o d d d o o o o  o  o
3146           --------------------------
3147 .ve
3148 
3149    Thus, any entries in the d locations are stored in the d (diagonal)
3150    submatrix, and any entries in the o locations are stored in the
3151    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3152    stored simply in the MATSEQBAIJ format for compressed row storage.
3153 
3154    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3155    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3156    In general, for PDE problems in which most nonzeros are near the diagonal,
3157    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3158    or you will get TERRIBLE performance; see the users' manual chapter on
3159    matrices.
3160 
3161    Level: intermediate
3162 
3163 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3164 @*/
3165 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3166 {
3167   PetscErrorCode ierr;
3168   PetscMPIInt    size;
3169 
3170   PetscFunctionBegin;
3171   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3172   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3174   if (size > 1) {
3175     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3176     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3177   } else {
3178     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3179     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3180   }
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3185 {
3186   Mat            mat;
3187   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3188   PetscErrorCode ierr;
3189   PetscInt       len=0;
3190 
3191   PetscFunctionBegin;
3192   *newmat = 0;
3193   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3194   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3195   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3196 
3197   mat->factortype   = matin->factortype;
3198   mat->preallocated = PETSC_TRUE;
3199   mat->assembled    = PETSC_TRUE;
3200   mat->insertmode   = NOT_SET_VALUES;
3201 
3202   a             = (Mat_MPIBAIJ*)mat->data;
3203   mat->rmap->bs = matin->rmap->bs;
3204   a->bs2        = oldmat->bs2;
3205   a->mbs        = oldmat->mbs;
3206   a->nbs        = oldmat->nbs;
3207   a->Mbs        = oldmat->Mbs;
3208   a->Nbs        = oldmat->Nbs;
3209 
3210   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3211   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3212 
3213   a->size         = oldmat->size;
3214   a->rank         = oldmat->rank;
3215   a->donotstash   = oldmat->donotstash;
3216   a->roworiented  = oldmat->roworiented;
3217   a->rowindices   = 0;
3218   a->rowvalues    = 0;
3219   a->getrowactive = PETSC_FALSE;
3220   a->barray       = 0;
3221   a->rstartbs     = oldmat->rstartbs;
3222   a->rendbs       = oldmat->rendbs;
3223   a->cstartbs     = oldmat->cstartbs;
3224   a->cendbs       = oldmat->cendbs;
3225 
3226   /* hash table stuff */
3227   a->ht           = 0;
3228   a->hd           = 0;
3229   a->ht_size      = 0;
3230   a->ht_flag      = oldmat->ht_flag;
3231   a->ht_fact      = oldmat->ht_fact;
3232   a->ht_total_ct  = 0;
3233   a->ht_insert_ct = 0;
3234 
3235   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3236   if (oldmat->colmap) {
3237 #if defined(PETSC_USE_CTABLE)
3238     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3239 #else
3240     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3241     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3242     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3243 #endif
3244   } else a->colmap = 0;
3245 
3246   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3247     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3248     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3249     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3250   } else a->garray = 0;
3251 
3252   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3253   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3254   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3255   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3257 
3258   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3259   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3260   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3261   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3262   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3263   *newmat = mat;
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3268 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3269 {
3270   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3271   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3272   PetscScalar    *matvals;
3273   PetscErrorCode ierr;
3274 
3275   PetscFunctionBegin;
3276   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3277 
3278   /* read in matrix header */
3279   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3280   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3281   M  = header[1]; N = header[2]; nz = header[3];
3282   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3283   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3284   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3285 
3286   /* set block sizes from the viewer's .info file */
3287   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3288   /* set local sizes if not set already */
3289   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3290   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3291   /* set global sizes if not set already */
3292   if (mat->rmap->N < 0) mat->rmap->N = M;
3293   if (mat->cmap->N < 0) mat->cmap->N = N;
3294   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3295   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3296 
3297   /* check if the matrix sizes are correct */
3298   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3299   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3300   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3301   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3302   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3303   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3304   mbs = m/bs; nbs = n/bs;
3305 
3306   /* read in row lengths and build row indices */
3307   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3308   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3309   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3310   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3311   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3312 
3313   /* read in column indices and matrix values */
3314   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3315   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3316   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3317 
3318   { /* preallocate matrix storage */
3319     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3320     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3321     PetscBool  sbaij,done;
3322     PetscInt   *d_nnz,*o_nnz;
3323 
3324     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3325     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3326     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3327     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3328     for (i=0; i<mbs; i++) {
3329       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3330       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3331       for (k=0; k<bs; k++) {
3332         PetscInt row = bs*i + k;
3333         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3334           PetscInt col = colidxs[j];
3335           if (!sbaij || col >= row) {
3336             if (col >= cs && col < ce) {
3337               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3338             } else {
3339               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3340               if (done) o_nnz[i]++;
3341             }
3342           }
3343         }
3344       }
3345     }
3346     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3347     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3348     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3349     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3350     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3351   }
3352 
3353   /* store matrix values */
3354   for (i=0; i<m; i++) {
3355     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3356     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3357   }
3358 
3359   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3360   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3361   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3362   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3367 {
3368   PetscErrorCode ierr;
3369   PetscBool      isbinary;
3370 
3371   PetscFunctionBegin;
3372   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3373   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3374   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3375   PetscFunctionReturn(0);
3376 }
3377 
3378 /*@
3379    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3380 
3381    Input Parameters:
3382 +  mat  - the matrix
3383 -  fact - factor
3384 
3385    Not Collective, each process can use a different factor
3386 
3387    Level: advanced
3388 
3389   Notes:
3390    This can also be set by the command line option: -mat_use_hash_table <fact>
3391 
3392 .seealso: MatSetOption()
3393 @*/
3394 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3395 {
3396   PetscErrorCode ierr;
3397 
3398   PetscFunctionBegin;
3399   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3404 {
3405   Mat_MPIBAIJ *baij;
3406 
3407   PetscFunctionBegin;
3408   baij          = (Mat_MPIBAIJ*)mat->data;
3409   baij->ht_fact = fact;
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3414 {
3415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3416   PetscBool      flg;
3417   PetscErrorCode ierr;
3418 
3419   PetscFunctionBegin;
3420   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3421   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3422   if (Ad)     *Ad     = a->A;
3423   if (Ao)     *Ao     = a->B;
3424   if (colmap) *colmap = a->garray;
3425   PetscFunctionReturn(0);
3426 }
3427 
3428 /*
3429     Special version for direct calls from Fortran (to eliminate two function call overheads
3430 */
3431 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3432 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3433 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3434 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3435 #endif
3436 
3437 /*@C
3438   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3439 
3440   Collective on Mat
3441 
3442   Input Parameters:
3443 + mat - the matrix
3444 . min - number of input rows
3445 . im - input rows
3446 . nin - number of input columns
3447 . in - input columns
3448 . v - numerical values input
3449 - addvin - INSERT_VALUES or ADD_VALUES
3450 
3451   Notes:
3452     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3453 
3454   Level: advanced
3455 
3456 .seealso:   MatSetValuesBlocked()
3457 @*/
3458 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3459 {
3460   /* convert input arguments to C version */
3461   Mat        mat  = *matin;
3462   PetscInt   m    = *min, n = *nin;
3463   InsertMode addv = *addvin;
3464 
3465   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3466   const MatScalar *value;
3467   MatScalar       *barray     = baij->barray;
3468   PetscBool       roworiented = baij->roworiented;
3469   PetscErrorCode  ierr;
3470   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3471   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3472   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3473 
3474   PetscFunctionBegin;
3475   /* tasks normally handled by MatSetValuesBlocked() */
3476   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3477   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3478   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3479   if (mat->assembled) {
3480     mat->was_assembled = PETSC_TRUE;
3481     mat->assembled     = PETSC_FALSE;
3482   }
3483   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3484 
3485 
3486   if (!barray) {
3487     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3488     baij->barray = barray;
3489   }
3490 
3491   if (roworiented) stepval = (n-1)*bs;
3492   else stepval = (m-1)*bs;
3493 
3494   for (i=0; i<m; i++) {
3495     if (im[i] < 0) continue;
3496     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3497     if (im[i] >= rstart && im[i] < rend) {
3498       row = im[i] - rstart;
3499       for (j=0; j<n; j++) {
3500         /* If NumCol = 1 then a copy is not required */
3501         if ((roworiented) && (n == 1)) {
3502           barray = (MatScalar*)v + i*bs2;
3503         } else if ((!roworiented) && (m == 1)) {
3504           barray = (MatScalar*)v + j*bs2;
3505         } else { /* Here a copy is required */
3506           if (roworiented) {
3507             value = v + i*(stepval+bs)*bs + j*bs;
3508           } else {
3509             value = v + j*(stepval+bs)*bs + i*bs;
3510           }
3511           for (ii=0; ii<bs; ii++,value+=stepval) {
3512             for (jj=0; jj<bs; jj++) {
3513               *barray++ = *value++;
3514             }
3515           }
3516           barray -=bs2;
3517         }
3518 
3519         if (in[j] >= cstart && in[j] < cend) {
3520           col  = in[j] - cstart;
3521           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3522         } else if (in[j] < 0) continue;
3523         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3524         else {
3525           if (mat->was_assembled) {
3526             if (!baij->colmap) {
3527               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3528             }
3529 
3530 #if defined(PETSC_USE_DEBUG)
3531 #if defined(PETSC_USE_CTABLE)
3532             { PetscInt data;
3533               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3534               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3535             }
3536 #else
3537             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3538 #endif
3539 #endif
3540 #if defined(PETSC_USE_CTABLE)
3541             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3542             col  = (col - 1)/bs;
3543 #else
3544             col = (baij->colmap[in[j]] - 1)/bs;
3545 #endif
3546             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3547               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3548               col  =  in[j];
3549             }
3550           } else col = in[j];
3551           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3552         }
3553       }
3554     } else {
3555       if (!baij->donotstash) {
3556         if (roworiented) {
3557           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3558         } else {
3559           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3560         }
3561       }
3562     }
3563   }
3564 
3565   /* task normally handled by MatSetValuesBlocked() */
3566   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 /*@
3571      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3572          CSR format the local rows.
3573 
3574    Collective
3575 
3576    Input Parameters:
3577 +  comm - MPI communicator
3578 .  bs - the block size, only a block size of 1 is supported
3579 .  m - number of local rows (Cannot be PETSC_DECIDE)
3580 .  n - This value should be the same as the local size used in creating the
3581        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3582        calculated if N is given) For square matrices n is almost always m.
3583 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3584 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3585 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3586 .   j - column indices
3587 -   a - matrix values
3588 
3589    Output Parameter:
3590 .   mat - the matrix
3591 
3592    Level: intermediate
3593 
3594    Notes:
3595        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3596      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3597      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3598 
3599      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3600      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3601      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3602      with column-major ordering within blocks.
3603 
3604        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3605 
3606 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3607           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3608 @*/
3609 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3610 {
3611   PetscErrorCode ierr;
3612 
3613   PetscFunctionBegin;
3614   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3615   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3616   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3617   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3618   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3619   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3620   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3621   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3622   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3623   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3624   PetscFunctionReturn(0);
3625 }
3626 
3627 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3628 {
3629   PetscErrorCode ierr;
3630   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3631   PetscInt       *indx;
3632   PetscScalar    *values;
3633 
3634   PetscFunctionBegin;
3635   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3636   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3637     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3638     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3639     PetscInt       *bindx,rmax=a->rmax,j;
3640     PetscMPIInt    rank,size;
3641 
3642     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3643     mbs = m/bs; Nbs = N/cbs;
3644     if (n == PETSC_DECIDE) {
3645       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3646     }
3647     nbs = n/cbs;
3648 
3649     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3650     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3651 
3652     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3653     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3654     if (rank == size-1) {
3655       /* Check sum(nbs) = Nbs */
3656       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3657     }
3658 
3659     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3660     for (i=0; i<mbs; i++) {
3661       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3662       nnz = nnz/bs;
3663       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3664       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3665       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3666     }
3667     ierr = PetscFree(bindx);CHKERRQ(ierr);
3668 
3669     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3670     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3671     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3672     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3673     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3674     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3675     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3676   }
3677 
3678   /* numeric phase */
3679   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3680   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3681 
3682   for (i=0; i<m; i++) {
3683     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3684     Ii   = i + rstart;
3685     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3686     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3687   }
3688   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3690   PetscFunctionReturn(0);
3691 }
3692