xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f5ff9c666c0d37e8a7ec3aa2f8e2aa9e44449bdb)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479 
480         idx = h1;
481         if (PetscDefined(USE_DEBUG)) {
482           insert_ct++;
483           total_ct++;
484           if (HT[idx] != key) {
485             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
486             if (idx == size) {
487               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
488               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
489             }
490           }
491         } else if (HT[idx] != key) {
492           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
493           if (idx == size) {
494             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
495             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
496           }
497         }
498         /* A HASH table entry is found, so insert the values at the correct address */
499         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
500         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
501       }
502     } else if (!baij->donotstash) {
503       if (roworiented) {
504         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
505       } else {
506         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
507       }
508     }
509   }
510   if (PetscDefined(USE_DEBUG)) {
511     baij->ht_total_ct  += total_ct;
512     baij->ht_insert_ct += insert_ct;
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
518 {
519   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
520   PetscBool         roworiented = baij->roworiented;
521   PetscErrorCode    ierr;
522   PetscInt          i,j,ii,jj,row,col;
523   PetscInt          rstart=baij->rstartbs;
524   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
525   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
526   PetscReal         tmp;
527   MatScalar         **HD = baij->hd,*baij_a;
528   const PetscScalar *v_t,*value;
529   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
530 
531   PetscFunctionBegin;
532   if (roworiented) stepval = (n-1)*bs;
533   else stepval = (m-1)*bs;
534 
535   for (i=0; i<m; i++) {
536     if (PetscDefined(USE_DEBUG)) {
537       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
538       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
539     }
540     row = im[i];
541     v_t = v + i*nbs2;
542     if (row >= rstart && row < rend) {
543       for (j=0; j<n; j++) {
544         col = in[j];
545 
546         /* Look up into the Hash Table */
547         key = row*Nbs+col+1;
548         h1  = HASH(size,key,tmp);
549 
550         idx = h1;
551         if (PetscDefined(USE_DEBUG)) {
552           total_ct++;
553           insert_ct++;
554           if (HT[idx] != key) {
555             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
556             if (idx == size) {
557               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
558               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
559             }
560           }
561         } else if (HT[idx] != key) {
562           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
563           if (idx == size) {
564             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
565             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
566           }
567         }
568         baij_a = HD[idx];
569         if (roworiented) {
570           /*value = v + i*(stepval+bs)*bs + j*bs;*/
571           /* value = v + (i*(stepval+bs)+j)*bs; */
572           value = v_t;
573           v_t  += bs;
574           if (addv == ADD_VALUES) {
575             for (ii=0; ii<bs; ii++,value+=stepval) {
576               for (jj=ii; jj<bs2; jj+=bs) {
577                 baij_a[jj] += *value++;
578               }
579             }
580           } else {
581             for (ii=0; ii<bs; ii++,value+=stepval) {
582               for (jj=ii; jj<bs2; jj+=bs) {
583                 baij_a[jj] = *value++;
584               }
585             }
586           }
587         } else {
588           value = v + j*(stepval+bs)*bs + i*bs;
589           if (addv == ADD_VALUES) {
590             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
591               for (jj=0; jj<bs; jj++) {
592                 baij_a[jj] += *value++;
593               }
594             }
595           } else {
596             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
597               for (jj=0; jj<bs; jj++) {
598                 baij_a[jj] = *value++;
599               }
600             }
601           }
602         }
603       }
604     } else {
605       if (!baij->donotstash) {
606         if (roworiented) {
607           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
608         } else {
609           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
610         }
611       }
612     }
613   }
614   if (PetscDefined(USE_DEBUG)) {
615     baij->ht_total_ct  += total_ct;
616     baij->ht_insert_ct += insert_ct;
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
622 {
623   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
624   PetscErrorCode ierr;
625   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
626   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
627 
628   PetscFunctionBegin;
629   for (i=0; i<m; i++) {
630     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
631     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
632     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
633       row = idxm[i] - bsrstart;
634       for (j=0; j<n; j++) {
635         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
636         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
637         if (idxn[j] >= bscstart && idxn[j] < bscend) {
638           col  = idxn[j] - bscstart;
639           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
640         } else {
641           if (!baij->colmap) {
642             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
643           }
644 #if defined(PETSC_USE_CTABLE)
645           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
646           data--;
647 #else
648           data = baij->colmap[idxn[j]/bs]-1;
649 #endif
650           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
651           else {
652             col  = data + idxn[j]%bs;
653             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
654           }
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
663 {
664   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
665   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
666   PetscErrorCode ierr;
667   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
668   PetscReal      sum = 0.0;
669   MatScalar      *v;
670 
671   PetscFunctionBegin;
672   if (baij->size == 1) {
673     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
674   } else {
675     if (type == NORM_FROBENIUS) {
676       v  = amat->a;
677       nz = amat->nz*bs2;
678       for (i=0; i<nz; i++) {
679         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
680       }
681       v  = bmat->a;
682       nz = bmat->nz*bs2;
683       for (i=0; i<nz; i++) {
684         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
685       }
686       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
687       *nrm = PetscSqrtReal(*nrm);
688     } else if (type == NORM_1) { /* max column sum */
689       PetscReal *tmp,*tmp2;
690       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
691       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
692       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
693       v    = amat->a; jj = amat->j;
694       for (i=0; i<amat->nz; i++) {
695         for (j=0; j<bs; j++) {
696           col = bs*(cstart + *jj) + j; /* column index */
697           for (row=0; row<bs; row++) {
698             tmp[col] += PetscAbsScalar(*v);  v++;
699           }
700         }
701         jj++;
702       }
703       v = bmat->a; jj = bmat->j;
704       for (i=0; i<bmat->nz; i++) {
705         for (j=0; j<bs; j++) {
706           col = bs*garray[*jj] + j;
707           for (row=0; row<bs; row++) {
708             tmp[col] += PetscAbsScalar(*v); v++;
709           }
710         }
711         jj++;
712       }
713       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714       *nrm = 0.0;
715       for (j=0; j<mat->cmap->N; j++) {
716         if (tmp2[j] > *nrm) *nrm = tmp2[j];
717       }
718       ierr = PetscFree(tmp);CHKERRQ(ierr);
719       ierr = PetscFree(tmp2);CHKERRQ(ierr);
720     } else if (type == NORM_INFINITY) { /* max row sum */
721       PetscReal *sums;
722       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
723       sum  = 0.0;
724       for (j=0; j<amat->mbs; j++) {
725         for (row=0; row<bs; row++) sums[row] = 0.0;
726         v  = amat->a + bs2*amat->i[j];
727         nz = amat->i[j+1]-amat->i[j];
728         for (i=0; i<nz; i++) {
729           for (col=0; col<bs; col++) {
730             for (row=0; row<bs; row++) {
731               sums[row] += PetscAbsScalar(*v); v++;
732             }
733           }
734         }
735         v  = bmat->a + bs2*bmat->i[j];
736         nz = bmat->i[j+1]-bmat->i[j];
737         for (i=0; i<nz; i++) {
738           for (col=0; col<bs; col++) {
739             for (row=0; row<bs; row++) {
740               sums[row] += PetscAbsScalar(*v); v++;
741             }
742           }
743         }
744         for (row=0; row<bs; row++) {
745           if (sums[row] > sum) sum = sums[row];
746         }
747       }
748       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749       ierr = PetscFree(sums);CHKERRQ(ierr);
750     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 /*
756   Creates the hash table, and sets the table
757   This table is created only once.
758   If new entried need to be added to the matrix
759   then the hash table has to be destroyed and
760   recreated.
761 */
762 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
763 {
764   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
765   Mat            A     = baij->A,B=baij->B;
766   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
767   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
768   PetscErrorCode ierr;
769   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
770   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
771   PetscInt       *HT,key;
772   MatScalar      **HD;
773   PetscReal      tmp;
774 #if defined(PETSC_USE_INFO)
775   PetscInt ct=0,max=0;
776 #endif
777 
778   PetscFunctionBegin;
779   if (baij->ht) PetscFunctionReturn(0);
780 
781   baij->ht_size = (PetscInt)(factor*nz);
782   ht_size       = baij->ht_size;
783 
784   /* Allocate Memory for Hash Table */
785   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
786   HD   = baij->hd;
787   HT   = baij->ht;
788 
789   /* Loop Over A */
790   for (i=0; i<a->mbs; i++) {
791     for (j=ai[i]; j<ai[i+1]; j++) {
792       row = i+rstart;
793       col = aj[j]+cstart;
794 
795       key = row*Nbs + col + 1;
796       h1  = HASH(ht_size,key,tmp);
797       for (k=0; k<ht_size; k++) {
798         if (!HT[(h1+k)%ht_size]) {
799           HT[(h1+k)%ht_size] = key;
800           HD[(h1+k)%ht_size] = a->a + j*bs2;
801           break;
802 #if defined(PETSC_USE_INFO)
803         } else {
804           ct++;
805 #endif
806         }
807       }
808 #if defined(PETSC_USE_INFO)
809       if (k> max) max = k;
810 #endif
811     }
812   }
813   /* Loop Over B */
814   for (i=0; i<b->mbs; i++) {
815     for (j=bi[i]; j<bi[i+1]; j++) {
816       row = i+rstart;
817       col = garray[bj[j]];
818       key = row*Nbs + col + 1;
819       h1  = HASH(ht_size,key,tmp);
820       for (k=0; k<ht_size; k++) {
821         if (!HT[(h1+k)%ht_size]) {
822           HT[(h1+k)%ht_size] = key;
823           HD[(h1+k)%ht_size] = b->a + j*bs2;
824           break;
825 #if defined(PETSC_USE_INFO)
826         } else {
827           ct++;
828 #endif
829         }
830       }
831 #if defined(PETSC_USE_INFO)
832       if (k> max) max = k;
833 #endif
834     }
835   }
836 
837   /* Print Summary */
838 #if defined(PETSC_USE_INFO)
839   for (i=0,j=0; i<ht_size; i++) {
840     if (HT[i]) j++;
841   }
842   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
843 #endif
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
848 {
849   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
850   PetscErrorCode ierr;
851   PetscInt       nstash,reallocs;
852 
853   PetscFunctionBegin;
854   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
855 
856   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
857   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
858   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
859   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
860   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
861   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
868   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
869   PetscErrorCode ierr;
870   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
871   PetscInt       *row,*col;
872   PetscBool      r1,r2,r3,other_disassembled;
873   MatScalar      *val;
874   PetscMPIInt    n;
875 
876   PetscFunctionBegin;
877   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
878   if (!baij->donotstash && !mat->nooffprocentries) {
879     while (1) {
880       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
881       if (!flg) break;
882 
883       for (i=0; i<n;) {
884         /* Now identify the consecutive vals belonging to the same row */
885         for (j=i,rstart=row[j]; j<n; j++) {
886           if (row[j] != rstart) break;
887         }
888         if (j < n) ncols = j-i;
889         else       ncols = n-i;
890         /* Now assemble all these values with a single function call */
891         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
892         i    = j;
893       }
894     }
895     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
896     /* Now process the block-stash. Since the values are stashed column-oriented,
897        set the roworiented flag to column oriented, and after MatSetValues()
898        restore the original flags */
899     r1 = baij->roworiented;
900     r2 = a->roworiented;
901     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
902 
903     baij->roworiented = PETSC_FALSE;
904     a->roworiented    = PETSC_FALSE;
905 
906     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
907     while (1) {
908       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
909       if (!flg) break;
910 
911       for (i=0; i<n;) {
912         /* Now identify the consecutive vals belonging to the same row */
913         for (j=i,rstart=row[j]; j<n; j++) {
914           if (row[j] != rstart) break;
915         }
916         if (j < n) ncols = j-i;
917         else       ncols = n-i;
918         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
919         i    = j;
920       }
921     }
922     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
923 
924     baij->roworiented = r1;
925     a->roworiented    = r2;
926 
927     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
928   }
929 
930   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932 
933   /* determine if any processor has disassembled, if so we must
934      also disassemble ourselfs, in order that we may reassemble. */
935   /*
936      if nonzero structure of submatrix B cannot change then we know that
937      no processor disassembled thus we can skip this stuff
938   */
939   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
940     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
941     if (mat->was_assembled && !other_disassembled) {
942       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943     }
944   }
945 
946   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948   }
949   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951 
952 #if defined(PETSC_USE_INFO)
953   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
955 
956     baij->ht_total_ct  = 0;
957     baij->ht_insert_ct = 0;
958   }
959 #endif
960   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
961     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
962 
963     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
964     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
965   }
966 
967   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
968 
969   baij->rowvalues = NULL;
970 
971   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
972   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
973     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
974     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
980 #include <petscdraw.h>
981 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
982 {
983   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
984   PetscErrorCode    ierr;
985   PetscMPIInt       rank = baij->rank;
986   PetscInt          bs   = mat->rmap->bs;
987   PetscBool         iascii,isdraw;
988   PetscViewer       sviewer;
989   PetscViewerFormat format;
990 
991   PetscFunctionBegin;
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
994   if (iascii) {
995     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
996     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
997       MatInfo info;
998       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
999       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1001       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1002                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1003       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1004       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1005       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1006       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1007       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1009       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1010       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1011       PetscFunctionReturn(0);
1012     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1013       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1014       PetscFunctionReturn(0);
1015     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1016       PetscFunctionReturn(0);
1017     }
1018   }
1019 
1020   if (isdraw) {
1021     PetscDraw draw;
1022     PetscBool isnull;
1023     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1024     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1025     if (isnull) PetscFunctionReturn(0);
1026   }
1027 
1028   {
1029     /* assemble the entire matrix onto first processor. */
1030     Mat         A;
1031     Mat_SeqBAIJ *Aloc;
1032     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1033     MatScalar   *a;
1034     const char  *matname;
1035 
1036     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1037     /* Perhaps this should be the type of mat? */
1038     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1039     if (!rank) {
1040       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1041     } else {
1042       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1043     }
1044     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1045     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1046     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1047     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1048 
1049     /* copy over the A part */
1050     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1051     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1053 
1054     for (i=0; i<mbs; i++) {
1055       rvals[0] = bs*(baij->rstartbs + i);
1056       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1057       for (j=ai[i]; j<ai[i+1]; j++) {
1058         col = (baij->cstartbs+aj[j])*bs;
1059         for (k=0; k<bs; k++) {
1060           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1061           col++; a += bs;
1062         }
1063       }
1064     }
1065     /* copy over the B part */
1066     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1067     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1068     for (i=0; i<mbs; i++) {
1069       rvals[0] = bs*(baij->rstartbs + i);
1070       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1071       for (j=ai[i]; j<ai[i+1]; j++) {
1072         col = baij->garray[aj[j]]*bs;
1073         for (k=0; k<bs; k++) {
1074           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1075           col++; a += bs;
1076         }
1077       }
1078     }
1079     ierr = PetscFree(rvals);CHKERRQ(ierr);
1080     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1082     /*
1083        Everyone has to call to draw the matrix since the graphics waits are
1084        synchronized across all processors that share the PetscDraw object
1085     */
1086     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1087     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1088     if (!rank) {
1089       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1090       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1091     }
1092     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1093     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1094     ierr = MatDestroy(&A);CHKERRQ(ierr);
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1100 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1101 {
1102   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1103   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1104   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1105   const PetscInt *garray = aij->garray;
1106   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1107   PetscInt       *rowlens,*colidxs;
1108   PetscScalar    *matvals;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1113 
1114   M  = mat->rmap->N;
1115   N  = mat->cmap->N;
1116   m  = mat->rmap->n;
1117   rs = mat->rmap->rstart;
1118   cs = mat->cmap->rstart;
1119   bs = mat->rmap->bs;
1120   nz = bs*bs*(A->nz + B->nz);
1121 
1122   /* write matrix header */
1123   header[0] = MAT_FILE_CLASSID;
1124   header[1] = M; header[2] = N; header[3] = nz;
1125   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1126   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1127 
1128   /* fill in and store row lengths */
1129   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1130   for (cnt=0, i=0; i<A->mbs; i++)
1131     for (j=0; j<bs; j++)
1132       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1133   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1134   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1135 
1136   /* fill in and store column indices */
1137   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1138   for (cnt=0, i=0; i<A->mbs; i++) {
1139     for (k=0; k<bs; k++) {
1140       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1141         if (garray[B->j[jb]] > cs/bs) break;
1142         for (l=0; l<bs; l++)
1143           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1144       }
1145       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1146         for (l=0; l<bs; l++)
1147           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1148       for (; jb<B->i[i+1]; jb++)
1149         for (l=0; l<bs; l++)
1150           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1151     }
1152   }
1153   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1154   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1155   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1156 
1157   /* fill in and store nonzero values */
1158   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1159   for (cnt=0, i=0; i<A->mbs; i++) {
1160     for (k=0; k<bs; k++) {
1161       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1162         if (garray[B->j[jb]] > cs/bs) break;
1163         for (l=0; l<bs; l++)
1164           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1165       }
1166       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1167         for (l=0; l<bs; l++)
1168           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1169       for (; jb<B->i[i+1]; jb++)
1170         for (l=0; l<bs; l++)
1171           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1172     }
1173   }
1174   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1175   ierr = PetscFree(matvals);CHKERRQ(ierr);
1176 
1177   /* write block size option to the viewer's .info file */
1178   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1183 {
1184   PetscErrorCode ierr;
1185   PetscBool      iascii,isdraw,issocket,isbinary;
1186 
1187   PetscFunctionBegin;
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1191   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   if (iascii || isdraw || issocket) {
1193     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1194   } else if (isbinary) {
1195     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1201 {
1202   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1208 #endif
1209   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1210   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1212   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1213 #if defined(PETSC_USE_CTABLE)
1214   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1215 #else
1216   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1217 #endif
1218   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1219   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1220   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1221   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1222   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1223   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1224   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1225   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1236 #if defined(PETSC_HAVE_HYPRE)
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1238 #endif
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1244 {
1245   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1246   PetscErrorCode ierr;
1247   PetscInt       nt;
1248 
1249   PetscFunctionBegin;
1250   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1251   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1252   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1253   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1254   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1256   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1262 {
1263   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1275 {
1276   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1277   PetscErrorCode ierr;
1278 
1279   PetscFunctionBegin;
1280   /* do nondiagonal part */
1281   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1282   /* do local part */
1283   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1284   /* add partial results together */
1285   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291 {
1292   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   /* do nondiagonal part */
1297   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1298   /* do local part */
1299   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1300   /* add partial results together */
1301   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 /*
1307   This only works correctly for square matrices where the subblock A->A is the
1308    diagonal block
1309 */
1310 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1311 {
1312   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1317   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1322 {
1323   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1328   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1335   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1336   PetscErrorCode ierr;
1337   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1338   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1339   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1340 
1341   PetscFunctionBegin;
1342   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1343   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1344   mat->getrowactive = PETSC_TRUE;
1345 
1346   if (!mat->rowvalues && (idx || v)) {
1347     /*
1348         allocate enough space to hold information from the longest row.
1349     */
1350     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1351     PetscInt    max = 1,mbs = mat->mbs,tmp;
1352     for (i=0; i<mbs; i++) {
1353       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1354       if (max < tmp) max = tmp;
1355     }
1356     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1357   }
1358   lrow = row - brstart;
1359 
1360   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1361   if (!v)   {pvA = NULL; pvB = NULL;}
1362   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1363   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1364   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1365   nztot = nzA + nzB;
1366 
1367   cmap = mat->garray;
1368   if (v  || idx) {
1369     if (nztot) {
1370       /* Sort by increasing column numbers, assuming A and B already sorted */
1371       PetscInt imark = -1;
1372       if (v) {
1373         *v = v_p = mat->rowvalues;
1374         for (i=0; i<nzB; i++) {
1375           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1376           else break;
1377         }
1378         imark = i;
1379         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1380         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1381       }
1382       if (idx) {
1383         *idx = idx_p = mat->rowindices;
1384         if (imark > -1) {
1385           for (i=0; i<imark; i++) {
1386             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1387           }
1388         } else {
1389           for (i=0; i<nzB; i++) {
1390             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1391             else break;
1392           }
1393           imark = i;
1394         }
1395         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1396         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1397       }
1398     } else {
1399       if (idx) *idx = NULL;
1400       if (v)   *v   = NULL;
1401     }
1402   }
1403   *nz  = nztot;
1404   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1405   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1410 {
1411   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1412 
1413   PetscFunctionBegin;
1414   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1415   baij->getrowactive = PETSC_FALSE;
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1420 {
1421   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1426   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1431 {
1432   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1433   Mat            A  = a->A,B = a->B;
1434   PetscErrorCode ierr;
1435   PetscLogDouble isend[5],irecv[5];
1436 
1437   PetscFunctionBegin;
1438   info->block_size = (PetscReal)matin->rmap->bs;
1439 
1440   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1441 
1442   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1443   isend[3] = info->memory;  isend[4] = info->mallocs;
1444 
1445   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1446 
1447   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1448   isend[3] += info->memory;  isend[4] += info->mallocs;
1449 
1450   if (flag == MAT_LOCAL) {
1451     info->nz_used      = isend[0];
1452     info->nz_allocated = isend[1];
1453     info->nz_unneeded  = isend[2];
1454     info->memory       = isend[3];
1455     info->mallocs      = isend[4];
1456   } else if (flag == MAT_GLOBAL_MAX) {
1457     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1458 
1459     info->nz_used      = irecv[0];
1460     info->nz_allocated = irecv[1];
1461     info->nz_unneeded  = irecv[2];
1462     info->memory       = irecv[3];
1463     info->mallocs      = irecv[4];
1464   } else if (flag == MAT_GLOBAL_SUM) {
1465     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1466 
1467     info->nz_used      = irecv[0];
1468     info->nz_allocated = irecv[1];
1469     info->nz_unneeded  = irecv[2];
1470     info->memory       = irecv[3];
1471     info->mallocs      = irecv[4];
1472   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1473   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1474   info->fill_ratio_needed = 0;
1475   info->factor_mallocs    = 0;
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1480 {
1481   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   switch (op) {
1486   case MAT_NEW_NONZERO_LOCATIONS:
1487   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1488   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1489   case MAT_KEEP_NONZERO_PATTERN:
1490   case MAT_NEW_NONZERO_LOCATION_ERR:
1491     MatCheckPreallocated(A,1);
1492     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1493     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1494     break;
1495   case MAT_ROW_ORIENTED:
1496     MatCheckPreallocated(A,1);
1497     a->roworiented = flg;
1498 
1499     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1501     break;
1502   case MAT_FORCE_DIAGONAL_ENTRIES:
1503   case MAT_SORTED_FULL:
1504     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505     break;
1506   case MAT_IGNORE_OFF_PROC_ENTRIES:
1507     a->donotstash = flg;
1508     break;
1509   case MAT_USE_HASH_TABLE:
1510     a->ht_flag = flg;
1511     a->ht_fact = 1.39;
1512     break;
1513   case MAT_SYMMETRIC:
1514   case MAT_STRUCTURALLY_SYMMETRIC:
1515   case MAT_HERMITIAN:
1516   case MAT_SUBMAT_SINGLEIS:
1517   case MAT_SYMMETRY_ETERNAL:
1518     MatCheckPreallocated(A,1);
1519     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1520     break;
1521   default:
1522     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1528 {
1529   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1530   Mat_SeqBAIJ    *Aloc;
1531   Mat            B;
1532   PetscErrorCode ierr;
1533   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1534   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1535   MatScalar      *a;
1536 
1537   PetscFunctionBegin;
1538   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1539     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1540     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1541     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1542     /* Do not know preallocation information, but must set block size */
1543     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1544   } else {
1545     B = *matout;
1546   }
1547 
1548   /* copy over the A part */
1549   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1550   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1551   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1552 
1553   for (i=0; i<mbs; i++) {
1554     rvals[0] = bs*(baij->rstartbs + i);
1555     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1556     for (j=ai[i]; j<ai[i+1]; j++) {
1557       col = (baij->cstartbs+aj[j])*bs;
1558       for (k=0; k<bs; k++) {
1559         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1560 
1561         col++; a += bs;
1562       }
1563     }
1564   }
1565   /* copy over the B part */
1566   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1567   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1568   for (i=0; i<mbs; i++) {
1569     rvals[0] = bs*(baij->rstartbs + i);
1570     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1571     for (j=ai[i]; j<ai[i+1]; j++) {
1572       col = baij->garray[aj[j]]*bs;
1573       for (k=0; k<bs; k++) {
1574         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1575         col++;
1576         a += bs;
1577       }
1578     }
1579   }
1580   ierr = PetscFree(rvals);CHKERRQ(ierr);
1581   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583 
1584   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1585   else {
1586     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1592 {
1593   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1594   Mat            a     = baij->A,b = baij->B;
1595   PetscErrorCode ierr;
1596   PetscInt       s1,s2,s3;
1597 
1598   PetscFunctionBegin;
1599   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1600   if (rr) {
1601     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1602     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1603     /* Overlap communication with computation. */
1604     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605   }
1606   if (ll) {
1607     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1608     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1609     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1610   }
1611   /* scale  the diagonal block */
1612   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1613 
1614   if (rr) {
1615     /* Do a scatter end and then right scale the off-diagonal block */
1616     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1618   }
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1623 {
1624   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1625   PetscInt      *lrows;
1626   PetscInt       r, len;
1627   PetscBool      cong;
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   /* get locally owned rows */
1632   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1633   /* fix right hand side if needed */
1634   if (x && b) {
1635     const PetscScalar *xx;
1636     PetscScalar       *bb;
1637 
1638     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1639     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1640     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1641     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1642     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1643   }
1644 
1645   /* actually zap the local rows */
1646   /*
1647         Zero the required rows. If the "diagonal block" of the matrix
1648      is square and the user wishes to set the diagonal we use separate
1649      code so that MatSetValues() is not called for each diagonal allocating
1650      new memory, thus calling lots of mallocs and slowing things down.
1651 
1652   */
1653   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1654   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1655   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1656   if ((diag != 0.0) && cong) {
1657     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1658   } else if (diag != 0.0) {
1659     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1660     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1661        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1662     for (r = 0; r < len; ++r) {
1663       const PetscInt row = lrows[r] + A->rmap->rstart;
1664       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   } else {
1669     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1670   }
1671   ierr = PetscFree(lrows);CHKERRQ(ierr);
1672 
1673   /* only change matrix nonzero state if pattern was allowed to be changed */
1674   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1675     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1676     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1682 {
1683   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1684   PetscErrorCode    ierr;
1685   PetscMPIInt       n = A->rmap->n,p = 0;
1686   PetscInt          i,j,k,r,len = 0,row,col,count;
1687   PetscInt          *lrows,*owners = A->rmap->range;
1688   PetscSFNode       *rrows;
1689   PetscSF           sf;
1690   const PetscScalar *xx;
1691   PetscScalar       *bb,*mask;
1692   Vec               xmask,lmask;
1693   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1694   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1695   PetscScalar       *aa;
1696 
1697   PetscFunctionBegin;
1698   /* Create SF where leaves are input rows and roots are owned rows */
1699   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1700   for (r = 0; r < n; ++r) lrows[r] = -1;
1701   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1702   for (r = 0; r < N; ++r) {
1703     const PetscInt idx   = rows[r];
1704     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1705     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1706       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1707     }
1708     rrows[r].rank  = p;
1709     rrows[r].index = rows[r] - owners[p];
1710   }
1711   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1712   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1713   /* Collect flags for rows to be zeroed */
1714   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1716   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1717   /* Compress and put in row numbers */
1718   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1719   /* zero diagonal part of matrix */
1720   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1721   /* handle off diagonal part of matrix */
1722   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1723   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1724   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1725   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1726   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1727   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1730   if (x) {
1731     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1733     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1734     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1735   }
1736   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1737   /* remove zeroed rows of off diagonal matrix */
1738   for (i = 0; i < len; ++i) {
1739     row   = lrows[i];
1740     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1741     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1742     for (k = 0; k < count; ++k) {
1743       aa[0] = 0.0;
1744       aa   += bs;
1745     }
1746   }
1747   /* loop over all elements of off process part of matrix zeroing removed columns*/
1748   for (i = 0; i < l->B->rmap->N; ++i) {
1749     row = i/bs;
1750     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1751       for (k = 0; k < bs; ++k) {
1752         col = bs*baij->j[j] + k;
1753         if (PetscAbsScalar(mask[col])) {
1754           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1755           if (x) bb[i] -= aa[0]*xx[col];
1756           aa[0] = 0.0;
1757         }
1758       }
1759     }
1760   }
1761   if (x) {
1762     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1763     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1764   }
1765   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1766   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1767   ierr = PetscFree(lrows);CHKERRQ(ierr);
1768 
1769   /* only change matrix nonzero state if pattern was allowed to be changed */
1770   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1771     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1772     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1778 {
1779   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1780   PetscErrorCode ierr;
1781 
1782   PetscFunctionBegin;
1783   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1788 
1789 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1790 {
1791   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1792   Mat            a,b,c,d;
1793   PetscBool      flg;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   a = matA->A; b = matA->B;
1798   c = matB->A; d = matB->B;
1799 
1800   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1801   if (flg) {
1802     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1803   }
1804   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1809 {
1810   PetscErrorCode ierr;
1811   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1812   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1813 
1814   PetscFunctionBegin;
1815   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1816   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1817     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1818   } else {
1819     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1820     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1821   }
1822   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1827 {
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1836 {
1837   PetscErrorCode ierr;
1838   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1839   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1840   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1841 
1842   PetscFunctionBegin;
1843   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1848 {
1849   PetscErrorCode ierr;
1850   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1851   PetscBLASInt   bnz,one=1;
1852   Mat_SeqBAIJ    *x,*y;
1853   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1854 
1855   PetscFunctionBegin;
1856   if (str == SAME_NONZERO_PATTERN) {
1857     PetscScalar alpha = a;
1858     x    = (Mat_SeqBAIJ*)xx->A->data;
1859     y    = (Mat_SeqBAIJ*)yy->A->data;
1860     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1861     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1862     x    = (Mat_SeqBAIJ*)xx->B->data;
1863     y    = (Mat_SeqBAIJ*)yy->B->data;
1864     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1865     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1866     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1867   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1868     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1869   } else {
1870     Mat      B;
1871     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1872     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1873     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1874     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1875     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1876     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1877     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1878     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1880     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1881     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1882     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1883     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1884     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1886     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1887   }
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1892 {
1893 #if defined(PETSC_USE_COMPLEX)
1894   PetscErrorCode ierr;
1895   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1896 
1897   PetscFunctionBegin;
1898   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
1899   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
1900 #else
1901   PetscFunctionBegin;
1902 #endif
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1907 {
1908   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1909   PetscErrorCode ierr;
1910 
1911   PetscFunctionBegin;
1912   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1913   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1918 {
1919   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1920   PetscErrorCode ierr;
1921 
1922   PetscFunctionBegin;
1923   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1924   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1929 {
1930   PetscErrorCode ierr;
1931   IS             iscol_local;
1932   PetscInt       csize;
1933 
1934   PetscFunctionBegin;
1935   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1936   if (call == MAT_REUSE_MATRIX) {
1937     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1938     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1939   } else {
1940     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1941   }
1942   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1943   if (call == MAT_INITIAL_MATRIX) {
1944     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1945     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 /*
1951   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1952   in local and then by concatenating the local matrices the end result.
1953   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1954   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1955 */
1956 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1957 {
1958   PetscErrorCode ierr;
1959   PetscMPIInt    rank,size;
1960   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1961   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1962   Mat            M,Mreuse;
1963   MatScalar      *vwork,*aa;
1964   MPI_Comm       comm;
1965   IS             isrow_new, iscol_new;
1966   Mat_SeqBAIJ    *aij;
1967 
1968   PetscFunctionBegin;
1969   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1970   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1971   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1972   /* The compression and expansion should be avoided. Doesn't point
1973      out errors, might change the indices, hence buggey */
1974   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1975   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1976 
1977   if (call ==  MAT_REUSE_MATRIX) {
1978     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1979     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1980     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1981   } else {
1982     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1983   }
1984   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1985   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1986   /*
1987       m - number of local rows
1988       n - number of columns (same on all processors)
1989       rstart - first row in new global matrix generated
1990   */
1991   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1992   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1993   m    = m/bs;
1994   n    = n/bs;
1995 
1996   if (call == MAT_INITIAL_MATRIX) {
1997     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1998     ii  = aij->i;
1999     jj  = aij->j;
2000 
2001     /*
2002         Determine the number of non-zeros in the diagonal and off-diagonal
2003         portions of the matrix in order to do correct preallocation
2004     */
2005 
2006     /* first get start and end of "diagonal" columns */
2007     if (csize == PETSC_DECIDE) {
2008       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2009       if (mglobal == n*bs) { /* square matrix */
2010         nlocal = m;
2011       } else {
2012         nlocal = n/size + ((n % size) > rank);
2013       }
2014     } else {
2015       nlocal = csize/bs;
2016     }
2017     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
2018     rstart = rend - nlocal;
2019     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2020 
2021     /* next, compute all the lengths */
2022     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2023     for (i=0; i<m; i++) {
2024       jend = ii[i+1] - ii[i];
2025       olen = 0;
2026       dlen = 0;
2027       for (j=0; j<jend; j++) {
2028         if (*jj < rstart || *jj >= rend) olen++;
2029         else dlen++;
2030         jj++;
2031       }
2032       olens[i] = olen;
2033       dlens[i] = dlen;
2034     }
2035     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2036     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2037     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2038     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2040     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2041   } else {
2042     PetscInt ml,nl;
2043 
2044     M    = *newmat;
2045     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2046     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2047     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2048     /*
2049          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2050        rather than the slower MatSetValues().
2051     */
2052     M->was_assembled = PETSC_TRUE;
2053     M->assembled     = PETSC_FALSE;
2054   }
2055   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2056   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2057   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2058   ii   = aij->i;
2059   jj   = aij->j;
2060   aa   = aij->a;
2061   for (i=0; i<m; i++) {
2062     row   = rstart/bs + i;
2063     nz    = ii[i+1] - ii[i];
2064     cwork = jj;     jj += nz;
2065     vwork = aa;     aa += nz*bs*bs;
2066     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2067   }
2068 
2069   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   *newmat = M;
2072 
2073   /* save submatrix used in processor for next request */
2074   if (call ==  MAT_INITIAL_MATRIX) {
2075     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2076     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2082 {
2083   MPI_Comm       comm,pcomm;
2084   PetscInt       clocal_size,nrows;
2085   const PetscInt *rows;
2086   PetscMPIInt    size;
2087   IS             crowp,lcolp;
2088   PetscErrorCode ierr;
2089 
2090   PetscFunctionBegin;
2091   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2092   /* make a collective version of 'rowp' */
2093   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2094   if (pcomm==comm) {
2095     crowp = rowp;
2096   } else {
2097     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2098     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2099     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2100     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2101   }
2102   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2103   /* make a local version of 'colp' */
2104   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2105   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2106   if (size==1) {
2107     lcolp = colp;
2108   } else {
2109     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2110   }
2111   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2112   /* now we just get the submatrix */
2113   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2114   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2115   /* clean up */
2116   if (pcomm!=comm) {
2117     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2118   }
2119   if (size>1) {
2120     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2121   }
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2126 {
2127   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2128   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2129 
2130   PetscFunctionBegin;
2131   if (nghosts) *nghosts = B->nbs;
2132   if (ghosts) *ghosts = baij->garray;
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2137 {
2138   Mat            B;
2139   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2140   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2141   Mat_SeqAIJ     *b;
2142   PetscErrorCode ierr;
2143   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2144   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2145   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2146 
2147   PetscFunctionBegin;
2148   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2149   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2150 
2151   /* ----------------------------------------------------------------
2152      Tell every processor the number of nonzeros per row
2153   */
2154   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2155   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2156     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2157   }
2158   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2159   displs    = recvcounts + size;
2160   for (i=0; i<size; i++) {
2161     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2162     displs[i]     = A->rmap->range[i]/bs;
2163   }
2164 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2165   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2166 #else
2167   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2168   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2169 #endif
2170   /* ---------------------------------------------------------------
2171      Create the sequential matrix of the same type as the local block diagonal
2172   */
2173   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2174   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2175   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2176   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2177   b    = (Mat_SeqAIJ*)B->data;
2178 
2179   /*--------------------------------------------------------------------
2180     Copy my part of matrix column indices over
2181   */
2182   sendcount  = ad->nz + bd->nz;
2183   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2184   a_jsendbuf = ad->j;
2185   b_jsendbuf = bd->j;
2186   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2187   cnt        = 0;
2188   for (i=0; i<n; i++) {
2189 
2190     /* put in lower diagonal portion */
2191     m = bd->i[i+1] - bd->i[i];
2192     while (m > 0) {
2193       /* is it above diagonal (in bd (compressed) numbering) */
2194       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2195       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2196       m--;
2197     }
2198 
2199     /* put in diagonal portion */
2200     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2201       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2202     }
2203 
2204     /* put in upper diagonal portion */
2205     while (m-- > 0) {
2206       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2207     }
2208   }
2209   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2210 
2211   /*--------------------------------------------------------------------
2212     Gather all column indices to all processors
2213   */
2214   for (i=0; i<size; i++) {
2215     recvcounts[i] = 0;
2216     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2217       recvcounts[i] += lens[j];
2218     }
2219   }
2220   displs[0] = 0;
2221   for (i=1; i<size; i++) {
2222     displs[i] = displs[i-1] + recvcounts[i-1];
2223   }
2224 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2225   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2226 #else
2227   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2228 #endif
2229   /*--------------------------------------------------------------------
2230     Assemble the matrix into useable form (note numerical values not yet set)
2231   */
2232   /* set the b->ilen (length of each row) values */
2233   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2234   /* set the b->i indices */
2235   b->i[0] = 0;
2236   for (i=1; i<=A->rmap->N/bs; i++) {
2237     b->i[i] = b->i[i-1] + lens[i-1];
2238   }
2239   ierr = PetscFree(lens);CHKERRQ(ierr);
2240   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2241   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2242   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2243 
2244   if (A->symmetric) {
2245     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2246   } else if (A->hermitian) {
2247     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2248   } else if (A->structurally_symmetric) {
2249     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2250   }
2251   *newmat = B;
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2256 {
2257   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2258   PetscErrorCode ierr;
2259   Vec            bb1 = NULL;
2260 
2261   PetscFunctionBegin;
2262   if (flag == SOR_APPLY_UPPER) {
2263     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2264     PetscFunctionReturn(0);
2265   }
2266 
2267   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2268     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2269   }
2270 
2271   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2272     if (flag & SOR_ZERO_INITIAL_GUESS) {
2273       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2274       its--;
2275     }
2276 
2277     while (its--) {
2278       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2279       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2280 
2281       /* update rhs: bb1 = bb - B*x */
2282       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2283       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2284 
2285       /* local sweep */
2286       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2287     }
2288   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2289     if (flag & SOR_ZERO_INITIAL_GUESS) {
2290       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2291       its--;
2292     }
2293     while (its--) {
2294       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2295       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2296 
2297       /* update rhs: bb1 = bb - B*x */
2298       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2299       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2300 
2301       /* local sweep */
2302       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2303     }
2304   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2305     if (flag & SOR_ZERO_INITIAL_GUESS) {
2306       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2307       its--;
2308     }
2309     while (its--) {
2310       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2311       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2312 
2313       /* update rhs: bb1 = bb - B*x */
2314       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2315       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2316 
2317       /* local sweep */
2318       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2319     }
2320   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2321 
2322   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2327 {
2328   PetscErrorCode ierr;
2329   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2330   PetscInt       N,i,*garray = aij->garray;
2331   PetscInt       ib,jb,bs = A->rmap->bs;
2332   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2333   MatScalar      *a_val = a_aij->a;
2334   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2335   MatScalar      *b_val = b_aij->a;
2336   PetscReal      *work;
2337 
2338   PetscFunctionBegin;
2339   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2340   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2341   if (type == NORM_2) {
2342     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2343       for (jb=0; jb<bs; jb++) {
2344         for (ib=0; ib<bs; ib++) {
2345           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2346           a_val++;
2347         }
2348       }
2349     }
2350     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2351       for (jb=0; jb<bs; jb++) {
2352         for (ib=0; ib<bs; ib++) {
2353           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2354           b_val++;
2355         }
2356       }
2357     }
2358   } else if (type == NORM_1) {
2359     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2360       for (jb=0; jb<bs; jb++) {
2361         for (ib=0; ib<bs; ib++) {
2362           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2363           a_val++;
2364         }
2365       }
2366     }
2367     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2368       for (jb=0; jb<bs; jb++) {
2369        for (ib=0; ib<bs; ib++) {
2370           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2371           b_val++;
2372         }
2373       }
2374     }
2375   } else if (type == NORM_INFINITY) {
2376     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2377       for (jb=0; jb<bs; jb++) {
2378         for (ib=0; ib<bs; ib++) {
2379           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2380           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2381           a_val++;
2382         }
2383       }
2384     }
2385     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2386       for (jb=0; jb<bs; jb++) {
2387         for (ib=0; ib<bs; ib++) {
2388           int col = garray[b_aij->j[i]] * bs + jb;
2389           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2390           b_val++;
2391         }
2392       }
2393     }
2394   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2395   if (type == NORM_INFINITY) {
2396     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2397   } else {
2398     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2399   }
2400   ierr = PetscFree(work);CHKERRQ(ierr);
2401   if (type == NORM_2) {
2402     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2403   }
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2408 {
2409   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2414   A->factorerrortype             = a->A->factorerrortype;
2415   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2416   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2421 {
2422   PetscErrorCode ierr;
2423   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2424   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2425 
2426   PetscFunctionBegin;
2427   if (!Y->preallocated) {
2428     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2429   } else if (!aij->nz) {
2430     PetscInt nonew = aij->nonew;
2431     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2432     aij->nonew = nonew;
2433   }
2434   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2439 {
2440   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2445   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2446   if (d) {
2447     PetscInt rstart;
2448     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2449     *d += rstart/A->rmap->bs;
2450 
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2456 {
2457   PetscFunctionBegin;
2458   *a = ((Mat_MPIBAIJ*)A->data)->A;
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 /* -------------------------------------------------------------------*/
2463 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2464                                        MatGetRow_MPIBAIJ,
2465                                        MatRestoreRow_MPIBAIJ,
2466                                        MatMult_MPIBAIJ,
2467                                 /* 4*/ MatMultAdd_MPIBAIJ,
2468                                        MatMultTranspose_MPIBAIJ,
2469                                        MatMultTransposeAdd_MPIBAIJ,
2470                                        NULL,
2471                                        NULL,
2472                                        NULL,
2473                                 /*10*/ NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        MatSOR_MPIBAIJ,
2477                                        MatTranspose_MPIBAIJ,
2478                                 /*15*/ MatGetInfo_MPIBAIJ,
2479                                        MatEqual_MPIBAIJ,
2480                                        MatGetDiagonal_MPIBAIJ,
2481                                        MatDiagonalScale_MPIBAIJ,
2482                                        MatNorm_MPIBAIJ,
2483                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2484                                        MatAssemblyEnd_MPIBAIJ,
2485                                        MatSetOption_MPIBAIJ,
2486                                        MatZeroEntries_MPIBAIJ,
2487                                 /*24*/ MatZeroRows_MPIBAIJ,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                 /*29*/ MatSetUp_MPIBAIJ,
2493                                        NULL,
2494                                        NULL,
2495                                        MatGetDiagonalBlock_MPIBAIJ,
2496                                        NULL,
2497                                 /*34*/ MatDuplicate_MPIBAIJ,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                 /*39*/ MatAXPY_MPIBAIJ,
2503                                        MatCreateSubMatrices_MPIBAIJ,
2504                                        MatIncreaseOverlap_MPIBAIJ,
2505                                        MatGetValues_MPIBAIJ,
2506                                        MatCopy_MPIBAIJ,
2507                                 /*44*/ NULL,
2508                                        MatScale_MPIBAIJ,
2509                                        MatShift_MPIBAIJ,
2510                                        NULL,
2511                                        MatZeroRowsColumns_MPIBAIJ,
2512                                 /*49*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2518                                        NULL,
2519                                        MatSetUnfactored_MPIBAIJ,
2520                                        MatPermute_MPIBAIJ,
2521                                        MatSetValuesBlocked_MPIBAIJ,
2522                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2523                                        MatDestroy_MPIBAIJ,
2524                                        MatView_MPIBAIJ,
2525                                        NULL,
2526                                        NULL,
2527                                 /*64*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        NULL,
2532                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2533                                        NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        NULL,
2537                                 /*74*/ NULL,
2538                                        MatFDColoringApply_BAIJ,
2539                                        NULL,
2540                                        NULL,
2541                                        NULL,
2542                                 /*79*/ NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        MatLoad_MPIBAIJ,
2547                                 /*84*/ NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                 /*89*/ NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                 /*94*/ NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                 /*99*/ NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        MatConjugate_MPIBAIJ,
2566                                        NULL,
2567                                 /*104*/NULL,
2568                                        MatRealPart_MPIBAIJ,
2569                                        MatImaginaryPart_MPIBAIJ,
2570                                        NULL,
2571                                        NULL,
2572                                 /*109*/NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        MatMissingDiagonal_MPIBAIJ,
2577                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2578                                        NULL,
2579                                        MatGetGhosts_MPIBAIJ,
2580                                        NULL,
2581                                        NULL,
2582                                 /*119*/NULL,
2583                                        NULL,
2584                                        NULL,
2585                                        NULL,
2586                                        MatGetMultiProcBlock_MPIBAIJ,
2587                                 /*124*/NULL,
2588                                        MatGetColumnNorms_MPIBAIJ,
2589                                        MatInvertBlockDiagonal_MPIBAIJ,
2590                                        NULL,
2591                                        NULL,
2592                                /*129*/ NULL,
2593                                        NULL,
2594                                        NULL,
2595                                        NULL,
2596                                        NULL,
2597                                /*134*/ NULL,
2598                                        NULL,
2599                                        NULL,
2600                                        NULL,
2601                                        NULL,
2602                                /*139*/ MatSetBlockSizes_Default,
2603                                        NULL,
2604                                        NULL,
2605                                        MatFDColoringSetUp_MPIXAIJ,
2606                                        NULL,
2607                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2608 };
2609 
2610 
2611 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2612 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2613 
2614 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2615 {
2616   PetscInt       m,rstart,cstart,cend;
2617   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2618   const PetscInt *JJ    =NULL;
2619   PetscScalar    *values=NULL;
2620   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2621   PetscErrorCode ierr;
2622   PetscBool      nooffprocentries;
2623 
2624   PetscFunctionBegin;
2625   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2626   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2627   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2628   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2629   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2630   m      = B->rmap->n/bs;
2631   rstart = B->rmap->rstart/bs;
2632   cstart = B->cmap->rstart/bs;
2633   cend   = B->cmap->rend/bs;
2634 
2635   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2636   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2637   for (i=0; i<m; i++) {
2638     nz = ii[i+1] - ii[i];
2639     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2640     nz_max = PetscMax(nz_max,nz);
2641     dlen   = 0;
2642     olen   = 0;
2643     JJ     = jj + ii[i];
2644     for (j=0; j<nz; j++) {
2645       if (*JJ < cstart || *JJ >= cend) olen++;
2646       else dlen++;
2647       JJ++;
2648     }
2649     d_nnz[i] = dlen;
2650     o_nnz[i] = olen;
2651   }
2652   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2653   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2654 
2655   values = (PetscScalar*)V;
2656   if (!values) {
2657     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2658   }
2659   for (i=0; i<m; i++) {
2660     PetscInt          row    = i + rstart;
2661     PetscInt          ncols  = ii[i+1] - ii[i];
2662     const PetscInt    *icols = jj + ii[i];
2663     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2664       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2665       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2666     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2667       PetscInt j;
2668       for (j=0; j<ncols; j++) {
2669         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2670         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2671       }
2672     }
2673   }
2674 
2675   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2676   nooffprocentries    = B->nooffprocentries;
2677   B->nooffprocentries = PETSC_TRUE;
2678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   B->nooffprocentries = nooffprocentries;
2681 
2682   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 /*@C
2687    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2688 
2689    Collective
2690 
2691    Input Parameters:
2692 +  B - the matrix
2693 .  bs - the block size
2694 .  i - the indices into j for the start of each local row (starts with zero)
2695 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2696 -  v - optional values in the matrix
2697 
2698    Level: advanced
2699 
2700    Notes:
2701     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2702    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2703    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2704    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2705    block column and the second index is over columns within a block.
2706 
2707    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2708 
2709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2710 @*/
2711 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2712 {
2713   PetscErrorCode ierr;
2714 
2715   PetscFunctionBegin;
2716   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2717   PetscValidType(B,1);
2718   PetscValidLogicalCollectiveInt(B,bs,2);
2719   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2724 {
2725   Mat_MPIBAIJ    *b;
2726   PetscErrorCode ierr;
2727   PetscInt       i;
2728   PetscMPIInt    size;
2729 
2730   PetscFunctionBegin;
2731   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2732   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2733   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2734   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2735 
2736   if (d_nnz) {
2737     for (i=0; i<B->rmap->n/bs; i++) {
2738       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2739     }
2740   }
2741   if (o_nnz) {
2742     for (i=0; i<B->rmap->n/bs; i++) {
2743       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2744     }
2745   }
2746 
2747   b      = (Mat_MPIBAIJ*)B->data;
2748   b->bs2 = bs*bs;
2749   b->mbs = B->rmap->n/bs;
2750   b->nbs = B->cmap->n/bs;
2751   b->Mbs = B->rmap->N/bs;
2752   b->Nbs = B->cmap->N/bs;
2753 
2754   for (i=0; i<=b->size; i++) {
2755     b->rangebs[i] = B->rmap->range[i]/bs;
2756   }
2757   b->rstartbs = B->rmap->rstart/bs;
2758   b->rendbs   = B->rmap->rend/bs;
2759   b->cstartbs = B->cmap->rstart/bs;
2760   b->cendbs   = B->cmap->rend/bs;
2761 
2762 #if defined(PETSC_USE_CTABLE)
2763   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2764 #else
2765   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2766 #endif
2767   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2768   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2769   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2770 
2771   /* Because the B will have been resized we simply destroy it and create a new one each time */
2772   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2773   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2774   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2775   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2776   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2777   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2778 
2779   if (!B->preallocated) {
2780     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2781     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2782     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2783     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2784     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2785   }
2786 
2787   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2788   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2789   B->preallocated  = PETSC_TRUE;
2790   B->was_assembled = PETSC_FALSE;
2791   B->assembled     = PETSC_FALSE;
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2796 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2797 
2798 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2799 {
2800   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2801   PetscErrorCode ierr;
2802   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2803   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2804   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2805 
2806   PetscFunctionBegin;
2807   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2808   ii[0] = 0;
2809   for (i=0; i<M; i++) {
2810     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2811     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2812     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2813     /* remove one from count of matrix has diagonal */
2814     for (j=id[i]; j<id[i+1]; j++) {
2815       if (jd[j] == i) {ii[i+1]--;break;}
2816     }
2817   }
2818   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2819   cnt  = 0;
2820   for (i=0; i<M; i++) {
2821     for (j=io[i]; j<io[i+1]; j++) {
2822       if (garray[jo[j]] > rstart) break;
2823       jj[cnt++] = garray[jo[j]];
2824     }
2825     for (k=id[i]; k<id[i+1]; k++) {
2826       if (jd[k] != i) {
2827         jj[cnt++] = rstart + jd[k];
2828       }
2829     }
2830     for (; j<io[i+1]; j++) {
2831       jj[cnt++] = garray[jo[j]];
2832     }
2833   }
2834   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2839 
2840 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2841 
2842 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2843 {
2844   PetscErrorCode ierr;
2845   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2846   Mat            B;
2847   Mat_MPIAIJ     *b;
2848 
2849   PetscFunctionBegin;
2850   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2851 
2852   if (reuse == MAT_REUSE_MATRIX) {
2853     B = *newmat;
2854   } else {
2855     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2856     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2857     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2858     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2859     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2860     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2861   }
2862   b = (Mat_MPIAIJ*) B->data;
2863 
2864   if (reuse == MAT_REUSE_MATRIX) {
2865     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2866     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2867   } else {
2868     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2869     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2870     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2871     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2872     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2873     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2874     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2875   }
2876   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2877   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2878 
2879   if (reuse == MAT_INPLACE_MATRIX) {
2880     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2881   } else {
2882    *newmat = B;
2883   }
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 /*MC
2888    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2889 
2890    Options Database Keys:
2891 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2892 . -mat_block_size <bs> - set the blocksize used to store the matrix
2893 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2894 - -mat_use_hash_table <fact>
2895 
2896    Level: beginner
2897 
2898    Notes:
2899     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2900     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2901 
2902 .seealso: MatCreateBAIJ
2903 M*/
2904 
2905 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2906 
2907 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2908 {
2909   Mat_MPIBAIJ    *b;
2910   PetscErrorCode ierr;
2911   PetscBool      flg = PETSC_FALSE;
2912 
2913   PetscFunctionBegin;
2914   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2915   B->data = (void*)b;
2916 
2917   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2918   B->assembled = PETSC_FALSE;
2919 
2920   B->insertmode = NOT_SET_VALUES;
2921   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2922   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2923 
2924   /* build local table of row and column ownerships */
2925   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2926 
2927   /* build cache for off array entries formed */
2928   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2929 
2930   b->donotstash  = PETSC_FALSE;
2931   b->colmap      = NULL;
2932   b->garray      = NULL;
2933   b->roworiented = PETSC_TRUE;
2934 
2935   /* stuff used in block assembly */
2936   b->barray = NULL;
2937 
2938   /* stuff used for matrix vector multiply */
2939   b->lvec  = NULL;
2940   b->Mvctx = NULL;
2941 
2942   /* stuff for MatGetRow() */
2943   b->rowindices   = NULL;
2944   b->rowvalues    = NULL;
2945   b->getrowactive = PETSC_FALSE;
2946 
2947   /* hash table stuff */
2948   b->ht           = NULL;
2949   b->hd           = NULL;
2950   b->ht_size      = 0;
2951   b->ht_flag      = PETSC_FALSE;
2952   b->ht_fact      = 0;
2953   b->ht_total_ct  = 0;
2954   b->ht_insert_ct = 0;
2955 
2956   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2957   b->ijonly = PETSC_FALSE;
2958 
2959 
2960   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2961   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2962   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2963 #if defined(PETSC_HAVE_HYPRE)
2964   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2965 #endif
2966   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2967   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2968   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2969   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2970   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2971   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2972   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2973   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2974 
2975   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2976   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2977   if (flg) {
2978     PetscReal fact = 1.39;
2979     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2980     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2981     if (fact <= 1.0) fact = 1.39;
2982     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2983     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2984   }
2985   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 /*MC
2990    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2991 
2992    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2993    and MATMPIBAIJ otherwise.
2994 
2995    Options Database Keys:
2996 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2997 
2998   Level: beginner
2999 
3000 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3001 M*/
3002 
3003 /*@C
3004    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3005    (block compressed row).  For good matrix assembly performance
3006    the user should preallocate the matrix storage by setting the parameters
3007    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3008    performance can be increased by more than a factor of 50.
3009 
3010    Collective on Mat
3011 
3012    Input Parameters:
3013 +  B - the matrix
3014 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3015           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3016 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3017            submatrix  (same for all local rows)
3018 .  d_nnz - array containing the number of block nonzeros in the various block rows
3019            of the in diagonal portion of the local (possibly different for each block
3020            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3021            set it even if it is zero.
3022 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3023            submatrix (same for all local rows).
3024 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3025            off-diagonal portion of the local submatrix (possibly different for
3026            each block row) or NULL.
3027 
3028    If the *_nnz parameter is given then the *_nz parameter is ignored
3029 
3030    Options Database Keys:
3031 +   -mat_block_size - size of the blocks to use
3032 -   -mat_use_hash_table <fact>
3033 
3034    Notes:
3035    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3036    than it must be used on all processors that share the object for that argument.
3037 
3038    Storage Information:
3039    For a square global matrix we define each processor's diagonal portion
3040    to be its local rows and the corresponding columns (a square submatrix);
3041    each processor's off-diagonal portion encompasses the remainder of the
3042    local matrix (a rectangular submatrix).
3043 
3044    The user can specify preallocated storage for the diagonal part of
3045    the local submatrix with either d_nz or d_nnz (not both).  Set
3046    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3047    memory allocation.  Likewise, specify preallocated storage for the
3048    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3049 
3050    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3051    the figure below we depict these three local rows and all columns (0-11).
3052 
3053 .vb
3054            0 1 2 3 4 5 6 7 8 9 10 11
3055           --------------------------
3056    row 3  |o o o d d d o o o o  o  o
3057    row 4  |o o o d d d o o o o  o  o
3058    row 5  |o o o d d d o o o o  o  o
3059           --------------------------
3060 .ve
3061 
3062    Thus, any entries in the d locations are stored in the d (diagonal)
3063    submatrix, and any entries in the o locations are stored in the
3064    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3065    stored simply in the MATSEQBAIJ format for compressed row storage.
3066 
3067    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3068    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3069    In general, for PDE problems in which most nonzeros are near the diagonal,
3070    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3071    or you will get TERRIBLE performance; see the users' manual chapter on
3072    matrices.
3073 
3074    You can call MatGetInfo() to get information on how effective the preallocation was;
3075    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3076    You can also run with the option -info and look for messages with the string
3077    malloc in them to see if additional memory allocation was needed.
3078 
3079    Level: intermediate
3080 
3081 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3082 @*/
3083 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3084 {
3085   PetscErrorCode ierr;
3086 
3087   PetscFunctionBegin;
3088   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3089   PetscValidType(B,1);
3090   PetscValidLogicalCollectiveInt(B,bs,2);
3091   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 /*@C
3096    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3097    (block compressed row).  For good matrix assembly performance
3098    the user should preallocate the matrix storage by setting the parameters
3099    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3100    performance can be increased by more than a factor of 50.
3101 
3102    Collective
3103 
3104    Input Parameters:
3105 +  comm - MPI communicator
3106 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3107           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3108 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3109            This value should be the same as the local size used in creating the
3110            y vector for the matrix-vector product y = Ax.
3111 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3112            This value should be the same as the local size used in creating the
3113            x vector for the matrix-vector product y = Ax.
3114 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3115 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3116 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3117            submatrix  (same for all local rows)
3118 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3119            of the in diagonal portion of the local (possibly different for each block
3120            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3121            and set it even if it is zero.
3122 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3123            submatrix (same for all local rows).
3124 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3125            off-diagonal portion of the local submatrix (possibly different for
3126            each block row) or NULL.
3127 
3128    Output Parameter:
3129 .  A - the matrix
3130 
3131    Options Database Keys:
3132 +   -mat_block_size - size of the blocks to use
3133 -   -mat_use_hash_table <fact>
3134 
3135    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3136    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3137    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3138 
3139    Notes:
3140    If the *_nnz parameter is given then the *_nz parameter is ignored
3141 
3142    A nonzero block is any block that as 1 or more nonzeros in it
3143 
3144    The user MUST specify either the local or global matrix dimensions
3145    (possibly both).
3146 
3147    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3148    than it must be used on all processors that share the object for that argument.
3149 
3150    Storage Information:
3151    For a square global matrix we define each processor's diagonal portion
3152    to be its local rows and the corresponding columns (a square submatrix);
3153    each processor's off-diagonal portion encompasses the remainder of the
3154    local matrix (a rectangular submatrix).
3155 
3156    The user can specify preallocated storage for the diagonal part of
3157    the local submatrix with either d_nz or d_nnz (not both).  Set
3158    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3159    memory allocation.  Likewise, specify preallocated storage for the
3160    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3161 
3162    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3163    the figure below we depict these three local rows and all columns (0-11).
3164 
3165 .vb
3166            0 1 2 3 4 5 6 7 8 9 10 11
3167           --------------------------
3168    row 3  |o o o d d d o o o o  o  o
3169    row 4  |o o o d d d o o o o  o  o
3170    row 5  |o o o d d d o o o o  o  o
3171           --------------------------
3172 .ve
3173 
3174    Thus, any entries in the d locations are stored in the d (diagonal)
3175    submatrix, and any entries in the o locations are stored in the
3176    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3177    stored simply in the MATSEQBAIJ format for compressed row storage.
3178 
3179    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3180    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3181    In general, for PDE problems in which most nonzeros are near the diagonal,
3182    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3183    or you will get TERRIBLE performance; see the users' manual chapter on
3184    matrices.
3185 
3186    Level: intermediate
3187 
3188 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3189 @*/
3190 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3191 {
3192   PetscErrorCode ierr;
3193   PetscMPIInt    size;
3194 
3195   PetscFunctionBegin;
3196   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3197   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3198   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3199   if (size > 1) {
3200     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3201     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3202   } else {
3203     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3204     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3205   }
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3210 {
3211   Mat            mat;
3212   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3213   PetscErrorCode ierr;
3214   PetscInt       len=0;
3215 
3216   PetscFunctionBegin;
3217   *newmat = NULL;
3218   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3219   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3220   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3221 
3222   mat->factortype   = matin->factortype;
3223   mat->preallocated = PETSC_TRUE;
3224   mat->assembled    = PETSC_TRUE;
3225   mat->insertmode   = NOT_SET_VALUES;
3226 
3227   a             = (Mat_MPIBAIJ*)mat->data;
3228   mat->rmap->bs = matin->rmap->bs;
3229   a->bs2        = oldmat->bs2;
3230   a->mbs        = oldmat->mbs;
3231   a->nbs        = oldmat->nbs;
3232   a->Mbs        = oldmat->Mbs;
3233   a->Nbs        = oldmat->Nbs;
3234 
3235   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3236   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3237 
3238   a->size         = oldmat->size;
3239   a->rank         = oldmat->rank;
3240   a->donotstash   = oldmat->donotstash;
3241   a->roworiented  = oldmat->roworiented;
3242   a->rowindices   = NULL;
3243   a->rowvalues    = NULL;
3244   a->getrowactive = PETSC_FALSE;
3245   a->barray       = NULL;
3246   a->rstartbs     = oldmat->rstartbs;
3247   a->rendbs       = oldmat->rendbs;
3248   a->cstartbs     = oldmat->cstartbs;
3249   a->cendbs       = oldmat->cendbs;
3250 
3251   /* hash table stuff */
3252   a->ht           = NULL;
3253   a->hd           = NULL;
3254   a->ht_size      = 0;
3255   a->ht_flag      = oldmat->ht_flag;
3256   a->ht_fact      = oldmat->ht_fact;
3257   a->ht_total_ct  = 0;
3258   a->ht_insert_ct = 0;
3259 
3260   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3261   if (oldmat->colmap) {
3262 #if defined(PETSC_USE_CTABLE)
3263     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3264 #else
3265     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3266     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3267     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3268 #endif
3269   } else a->colmap = NULL;
3270 
3271   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3272     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3273     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3274     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3275   } else a->garray = NULL;
3276 
3277   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3278   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3279   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3280   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3281   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3282 
3283   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3284   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3285   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3286   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3287   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3288   *newmat = mat;
3289   PetscFunctionReturn(0);
3290 }
3291 
3292 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3293 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3294 {
3295   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3296   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3297   PetscScalar    *matvals;
3298   PetscErrorCode ierr;
3299 
3300   PetscFunctionBegin;
3301   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3302 
3303   /* read in matrix header */
3304   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3305   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3306   M  = header[1]; N = header[2]; nz = header[3];
3307   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3308   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3309   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3310 
3311   /* set block sizes from the viewer's .info file */
3312   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3313   /* set local sizes if not set already */
3314   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3315   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3316   /* set global sizes if not set already */
3317   if (mat->rmap->N < 0) mat->rmap->N = M;
3318   if (mat->cmap->N < 0) mat->cmap->N = N;
3319   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3320   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3321 
3322   /* check if the matrix sizes are correct */
3323   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3324   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3325   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3326   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3327   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3328   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3329   mbs = m/bs; nbs = n/bs;
3330 
3331   /* read in row lengths and build row indices */
3332   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3333   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3334   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3335   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3336   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3337 
3338   /* read in column indices and matrix values */
3339   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3340   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3341   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3342 
3343   { /* preallocate matrix storage */
3344     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3345     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3346     PetscBool  sbaij,done;
3347     PetscInt   *d_nnz,*o_nnz;
3348 
3349     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3350     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3351     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3352     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3353     for (i=0; i<mbs; i++) {
3354       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3355       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3356       for (k=0; k<bs; k++) {
3357         PetscInt row = bs*i + k;
3358         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3359           PetscInt col = colidxs[j];
3360           if (!sbaij || col >= row) {
3361             if (col >= cs && col < ce) {
3362               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3363             } else {
3364               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3365               if (done) o_nnz[i]++;
3366             }
3367           }
3368         }
3369       }
3370     }
3371     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3372     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3373     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3374     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3375     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3376   }
3377 
3378   /* store matrix values */
3379   for (i=0; i<m; i++) {
3380     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3381     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3382   }
3383 
3384   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3385   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3386   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3392 {
3393   PetscErrorCode ierr;
3394   PetscBool      isbinary;
3395 
3396   PetscFunctionBegin;
3397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3398   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3399   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 /*@
3404    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3405 
3406    Input Parameters:
3407 +  mat  - the matrix
3408 -  fact - factor
3409 
3410    Not Collective, each process can use a different factor
3411 
3412    Level: advanced
3413 
3414   Notes:
3415    This can also be set by the command line option: -mat_use_hash_table <fact>
3416 
3417 .seealso: MatSetOption()
3418 @*/
3419 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3420 {
3421   PetscErrorCode ierr;
3422 
3423   PetscFunctionBegin;
3424   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3425   PetscFunctionReturn(0);
3426 }
3427 
3428 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3429 {
3430   Mat_MPIBAIJ *baij;
3431 
3432   PetscFunctionBegin;
3433   baij          = (Mat_MPIBAIJ*)mat->data;
3434   baij->ht_fact = fact;
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3439 {
3440   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3441   PetscBool      flg;
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3446   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3447   if (Ad)     *Ad     = a->A;
3448   if (Ao)     *Ao     = a->B;
3449   if (colmap) *colmap = a->garray;
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 /*
3454     Special version for direct calls from Fortran (to eliminate two function call overheads
3455 */
3456 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3457 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3458 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3459 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3460 #endif
3461 
3462 /*@C
3463   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3464 
3465   Collective on Mat
3466 
3467   Input Parameters:
3468 + mat - the matrix
3469 . min - number of input rows
3470 . im - input rows
3471 . nin - number of input columns
3472 . in - input columns
3473 . v - numerical values input
3474 - addvin - INSERT_VALUES or ADD_VALUES
3475 
3476   Notes:
3477     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3478 
3479   Level: advanced
3480 
3481 .seealso:   MatSetValuesBlocked()
3482 @*/
3483 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3484 {
3485   /* convert input arguments to C version */
3486   Mat        mat  = *matin;
3487   PetscInt   m    = *min, n = *nin;
3488   InsertMode addv = *addvin;
3489 
3490   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3491   const MatScalar *value;
3492   MatScalar       *barray     = baij->barray;
3493   PetscBool       roworiented = baij->roworiented;
3494   PetscErrorCode  ierr;
3495   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3496   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3497   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3498 
3499   PetscFunctionBegin;
3500   /* tasks normally handled by MatSetValuesBlocked() */
3501   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3502   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3503   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3504   if (mat->assembled) {
3505     mat->was_assembled = PETSC_TRUE;
3506     mat->assembled     = PETSC_FALSE;
3507   }
3508   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3509 
3510 
3511   if (!barray) {
3512     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3513     baij->barray = barray;
3514   }
3515 
3516   if (roworiented) stepval = (n-1)*bs;
3517   else stepval = (m-1)*bs;
3518 
3519   for (i=0; i<m; i++) {
3520     if (im[i] < 0) continue;
3521     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3522     if (im[i] >= rstart && im[i] < rend) {
3523       row = im[i] - rstart;
3524       for (j=0; j<n; j++) {
3525         /* If NumCol = 1 then a copy is not required */
3526         if ((roworiented) && (n == 1)) {
3527           barray = (MatScalar*)v + i*bs2;
3528         } else if ((!roworiented) && (m == 1)) {
3529           barray = (MatScalar*)v + j*bs2;
3530         } else { /* Here a copy is required */
3531           if (roworiented) {
3532             value = v + i*(stepval+bs)*bs + j*bs;
3533           } else {
3534             value = v + j*(stepval+bs)*bs + i*bs;
3535           }
3536           for (ii=0; ii<bs; ii++,value+=stepval) {
3537             for (jj=0; jj<bs; jj++) {
3538               *barray++ = *value++;
3539             }
3540           }
3541           barray -=bs2;
3542         }
3543 
3544         if (in[j] >= cstart && in[j] < cend) {
3545           col  = in[j] - cstart;
3546           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3547         } else if (in[j] < 0) continue;
3548         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3549         else {
3550           if (mat->was_assembled) {
3551             if (!baij->colmap) {
3552               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3553             }
3554 
3555 #if defined(PETSC_USE_DEBUG)
3556 #if defined(PETSC_USE_CTABLE)
3557             { PetscInt data;
3558               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3559               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3560             }
3561 #else
3562             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3563 #endif
3564 #endif
3565 #if defined(PETSC_USE_CTABLE)
3566             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3567             col  = (col - 1)/bs;
3568 #else
3569             col = (baij->colmap[in[j]] - 1)/bs;
3570 #endif
3571             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3572               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3573               col  =  in[j];
3574             }
3575           } else col = in[j];
3576           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3577         }
3578       }
3579     } else {
3580       if (!baij->donotstash) {
3581         if (roworiented) {
3582           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3583         } else {
3584           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3585         }
3586       }
3587     }
3588   }
3589 
3590   /* task normally handled by MatSetValuesBlocked() */
3591   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3592   PetscFunctionReturn(0);
3593 }
3594 
3595 /*@
3596      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3597          CSR format the local rows.
3598 
3599    Collective
3600 
3601    Input Parameters:
3602 +  comm - MPI communicator
3603 .  bs - the block size, only a block size of 1 is supported
3604 .  m - number of local rows (Cannot be PETSC_DECIDE)
3605 .  n - This value should be the same as the local size used in creating the
3606        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3607        calculated if N is given) For square matrices n is almost always m.
3608 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3609 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3610 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3611 .   j - column indices
3612 -   a - matrix values
3613 
3614    Output Parameter:
3615 .   mat - the matrix
3616 
3617    Level: intermediate
3618 
3619    Notes:
3620        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3621      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3622      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3623 
3624      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3625      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3626      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3627      with column-major ordering within blocks.
3628 
3629        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3630 
3631 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3632           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3633 @*/
3634 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3635 {
3636   PetscErrorCode ierr;
3637 
3638   PetscFunctionBegin;
3639   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3640   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3641   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3642   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3643   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3644   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3645   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3646   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3647   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3648   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3653 {
3654   PetscErrorCode ierr;
3655   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3656   PetscInt       *indx;
3657   PetscScalar    *values;
3658 
3659   PetscFunctionBegin;
3660   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3661   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3662     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3663     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3664     PetscInt       *bindx,rmax=a->rmax,j;
3665     PetscMPIInt    rank,size;
3666 
3667     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3668     mbs = m/bs; Nbs = N/cbs;
3669     if (n == PETSC_DECIDE) {
3670       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3671     }
3672     nbs = n/cbs;
3673 
3674     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3675     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3676 
3677     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3678     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
3679     if (rank == size-1) {
3680       /* Check sum(nbs) = Nbs */
3681       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3682     }
3683 
3684     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3685     for (i=0; i<mbs; i++) {
3686       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3687       nnz = nnz/bs;
3688       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3689       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3690       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3691     }
3692     ierr = PetscFree(bindx);CHKERRQ(ierr);
3693 
3694     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3695     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3696     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3697     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3698     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3699     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3700     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3701     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3702   }
3703 
3704   /* numeric phase */
3705   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3706   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3707 
3708   for (i=0; i<m; i++) {
3709     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3710     Ii   = i + rstart;
3711     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3712     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3713   }
3714   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3715   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3716   PetscFunctionReturn(0);
3717 }
3718