xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision dfd57a172ac9fa6c7b5fe6de6ab5df85cefc2996)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479         idx = h1;
480         if (PetscDefined(USE_DEBUG)) {
481           insert_ct++;
482           total_ct++;
483           if (HT[idx] != key) {
484             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
485             if (idx == size) {
486               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
487               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
488             }
489           }
490         } else if (HT[idx] != key) {
491           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
492           if (idx == size) {
493             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
494             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
495           }
496         }
497         /* A HASH table entry is found, so insert the values at the correct address */
498         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
499         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
500       }
501     } else if (!baij->donotstash) {
502       if (roworiented) {
503         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
504       } else {
505         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
506       }
507     }
508   }
509   if (PetscDefined(USE_DEBUG)) {
510     baij->ht_total_ct  += total_ct;
511     baij->ht_insert_ct += insert_ct;
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
517 {
518   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
519   PetscBool         roworiented = baij->roworiented;
520   PetscErrorCode    ierr;
521   PetscInt          i,j,ii,jj,row,col;
522   PetscInt          rstart=baij->rstartbs;
523   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
524   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
525   PetscReal         tmp;
526   MatScalar         **HD = baij->hd,*baij_a;
527   const PetscScalar *v_t,*value;
528   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
529 
530   PetscFunctionBegin;
531   if (roworiented) stepval = (n-1)*bs;
532   else stepval = (m-1)*bs;
533 
534   for (i=0; i<m; i++) {
535     if (PetscDefined(USE_DEBUG)) {
536       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
537       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
538     }
539     row = im[i];
540     v_t = v + i*nbs2;
541     if (row >= rstart && row < rend) {
542       for (j=0; j<n; j++) {
543         col = in[j];
544 
545         /* Look up into the Hash Table */
546         key = row*Nbs+col+1;
547         h1  = HASH(size,key,tmp);
548 
549         idx = h1;
550         if (PetscDefined(USE_DEBUG)) {
551           total_ct++;
552           insert_ct++;
553           if (HT[idx] != key) {
554             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
555             if (idx == size) {
556               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
557               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
558             }
559           }
560         } else if (HT[idx] != key) {
561           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
562           if (idx == size) {
563             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
564             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
565           }
566         }
567         baij_a = HD[idx];
568         if (roworiented) {
569           /*value = v + i*(stepval+bs)*bs + j*bs;*/
570           /* value = v + (i*(stepval+bs)+j)*bs; */
571           value = v_t;
572           v_t  += bs;
573           if (addv == ADD_VALUES) {
574             for (ii=0; ii<bs; ii++,value+=stepval) {
575               for (jj=ii; jj<bs2; jj+=bs) {
576                 baij_a[jj] += *value++;
577               }
578             }
579           } else {
580             for (ii=0; ii<bs; ii++,value+=stepval) {
581               for (jj=ii; jj<bs2; jj+=bs) {
582                 baij_a[jj] = *value++;
583               }
584             }
585           }
586         } else {
587           value = v + j*(stepval+bs)*bs + i*bs;
588           if (addv == ADD_VALUES) {
589             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590               for (jj=0; jj<bs; jj++) {
591                 baij_a[jj] += *value++;
592               }
593             }
594           } else {
595             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
596               for (jj=0; jj<bs; jj++) {
597                 baij_a[jj] = *value++;
598               }
599             }
600           }
601         }
602       }
603     } else {
604       if (!baij->donotstash) {
605         if (roworiented) {
606           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         } else {
608           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
609         }
610       }
611     }
612   }
613   if (PetscDefined(USE_DEBUG)) {
614     baij->ht_total_ct  += total_ct;
615     baij->ht_insert_ct += insert_ct;
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
623   PetscErrorCode ierr;
624   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
625   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
626 
627   PetscFunctionBegin;
628   for (i=0; i<m; i++) {
629     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
630     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
631     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632       row = idxm[i] - bsrstart;
633       for (j=0; j<n; j++) {
634         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
635         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
636         if (idxn[j] >= bscstart && idxn[j] < bscend) {
637           col  = idxn[j] - bscstart;
638           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
639         } else {
640           if (!baij->colmap) {
641             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642           }
643 #if defined(PETSC_USE_CTABLE)
644           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
645           data--;
646 #else
647           data = baij->colmap[idxn[j]/bs]-1;
648 #endif
649           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650           else {
651             col  = data + idxn[j]%bs;
652             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
653           }
654         }
655       }
656     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
662 {
663   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
664   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
665   PetscErrorCode ierr;
666   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
667   PetscReal      sum = 0.0;
668   MatScalar      *v;
669 
670   PetscFunctionBegin;
671   if (baij->size == 1) {
672     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
673   } else {
674     if (type == NORM_FROBENIUS) {
675       v  = amat->a;
676       nz = amat->nz*bs2;
677       for (i=0; i<nz; i++) {
678         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
679       }
680       v  = bmat->a;
681       nz = bmat->nz*bs2;
682       for (i=0; i<nz; i++) {
683         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
684       }
685       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
686       *nrm = PetscSqrtReal(*nrm);
687     } else if (type == NORM_1) { /* max column sum */
688       PetscReal *tmp,*tmp2;
689       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
690       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
691       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
692       v    = amat->a; jj = amat->j;
693       for (i=0; i<amat->nz; i++) {
694         for (j=0; j<bs; j++) {
695           col = bs*(cstart + *jj) + j; /* column index */
696           for (row=0; row<bs; row++) {
697             tmp[col] += PetscAbsScalar(*v);  v++;
698           }
699         }
700         jj++;
701       }
702       v = bmat->a; jj = bmat->j;
703       for (i=0; i<bmat->nz; i++) {
704         for (j=0; j<bs; j++) {
705           col = bs*garray[*jj] + j;
706           for (row=0; row<bs; row++) {
707             tmp[col] += PetscAbsScalar(*v); v++;
708           }
709         }
710         jj++;
711       }
712       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
713       *nrm = 0.0;
714       for (j=0; j<mat->cmap->N; j++) {
715         if (tmp2[j] > *nrm) *nrm = tmp2[j];
716       }
717       ierr = PetscFree(tmp);CHKERRQ(ierr);
718       ierr = PetscFree(tmp2);CHKERRQ(ierr);
719     } else if (type == NORM_INFINITY) { /* max row sum */
720       PetscReal *sums;
721       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
722       sum  = 0.0;
723       for (j=0; j<amat->mbs; j++) {
724         for (row=0; row<bs; row++) sums[row] = 0.0;
725         v  = amat->a + bs2*amat->i[j];
726         nz = amat->i[j+1]-amat->i[j];
727         for (i=0; i<nz; i++) {
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               sums[row] += PetscAbsScalar(*v); v++;
731             }
732           }
733         }
734         v  = bmat->a + bs2*bmat->i[j];
735         nz = bmat->i[j+1]-bmat->i[j];
736         for (i=0; i<nz; i++) {
737           for (col=0; col<bs; col++) {
738             for (row=0; row<bs; row++) {
739               sums[row] += PetscAbsScalar(*v); v++;
740             }
741           }
742         }
743         for (row=0; row<bs; row++) {
744           if (sums[row] > sum) sum = sums[row];
745         }
746       }
747       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
748       ierr = PetscFree(sums);CHKERRQ(ierr);
749     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755   Creates the hash table, and sets the table
756   This table is created only once.
757   If new entried need to be added to the matrix
758   then the hash table has to be destroyed and
759   recreated.
760 */
761 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
762 {
763   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
764   Mat            A     = baij->A,B=baij->B;
765   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
766   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
767   PetscErrorCode ierr;
768   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
769   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
770   PetscInt       *HT,key;
771   MatScalar      **HD;
772   PetscReal      tmp;
773 #if defined(PETSC_USE_INFO)
774   PetscInt ct=0,max=0;
775 #endif
776 
777   PetscFunctionBegin;
778   if (baij->ht) PetscFunctionReturn(0);
779 
780   baij->ht_size = (PetscInt)(factor*nz);
781   ht_size       = baij->ht_size;
782 
783   /* Allocate Memory for Hash Table */
784   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
785   HD   = baij->hd;
786   HT   = baij->ht;
787 
788   /* Loop Over A */
789   for (i=0; i<a->mbs; i++) {
790     for (j=ai[i]; j<ai[i+1]; j++) {
791       row = i+rstart;
792       col = aj[j]+cstart;
793 
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++) {
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = a->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812   /* Loop Over B */
813   for (i=0; i<b->mbs; i++) {
814     for (j=bi[i]; j<bi[i+1]; j++) {
815       row = i+rstart;
816       col = garray[bj[j]];
817       key = row*Nbs + col + 1;
818       h1  = HASH(ht_size,key,tmp);
819       for (k=0; k<ht_size; k++) {
820         if (!HT[(h1+k)%ht_size]) {
821           HT[(h1+k)%ht_size] = key;
822           HD[(h1+k)%ht_size] = b->a + j*bs2;
823           break;
824 #if defined(PETSC_USE_INFO)
825         } else {
826           ct++;
827 #endif
828         }
829       }
830 #if defined(PETSC_USE_INFO)
831       if (k> max) max = k;
832 #endif
833     }
834   }
835 
836   /* Print Summary */
837 #if defined(PETSC_USE_INFO)
838   for (i=0,j=0; i<ht_size; i++) {
839     if (HT[i]) j++;
840   }
841   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
842 #endif
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
849   PetscErrorCode ierr;
850   PetscInt       nstash,reallocs;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
854 
855   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
856   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
857   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
858   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
859   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
860   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
865 {
866   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
867   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
868   PetscErrorCode ierr;
869   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
870   PetscInt       *row,*col;
871   PetscBool      r1,r2,r3,other_disassembled;
872   MatScalar      *val;
873   PetscMPIInt    n;
874 
875   PetscFunctionBegin;
876   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
877   if (!baij->donotstash && !mat->nooffprocentries) {
878     while (1) {
879       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
880       if (!flg) break;
881 
882       for (i=0; i<n;) {
883         /* Now identify the consecutive vals belonging to the same row */
884         for (j=i,rstart=row[j]; j<n; j++) {
885           if (row[j] != rstart) break;
886         }
887         if (j < n) ncols = j-i;
888         else       ncols = n-i;
889         /* Now assemble all these values with a single function call */
890         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
891         i    = j;
892       }
893     }
894     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
895     /* Now process the block-stash. Since the values are stashed column-oriented,
896        set the roworiented flag to column oriented, and after MatSetValues()
897        restore the original flags */
898     r1 = baij->roworiented;
899     r2 = a->roworiented;
900     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
901 
902     baij->roworiented = PETSC_FALSE;
903     a->roworiented    = PETSC_FALSE;
904 
905     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
906     while (1) {
907       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
908       if (!flg) break;
909 
910       for (i=0; i<n;) {
911         /* Now identify the consecutive vals belonging to the same row */
912         for (j=i,rstart=row[j]; j<n; j++) {
913           if (row[j] != rstart) break;
914         }
915         if (j < n) ncols = j-i;
916         else       ncols = n-i;
917         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
918         i    = j;
919       }
920     }
921     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
922 
923     baij->roworiented = r1;
924     a->roworiented    = r2;
925 
926     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
927   }
928 
929   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
931 
932   /* determine if any processor has disassembled, if so we must
933      also disassemble ourselfs, in order that we may reassemble. */
934   /*
935      if nonzero structure of submatrix B cannot change then we know that
936      no processor disassembled thus we can skip this stuff
937   */
938   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
939     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
940     if (mat->was_assembled && !other_disassembled) {
941       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
942     }
943   }
944 
945   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
946     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
947   }
948   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
950 
951 #if defined(PETSC_USE_INFO)
952   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
953     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
954 
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961 
962     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
963     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
964   }
965 
966   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
967 
968   baij->rowvalues = NULL;
969 
970   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
971   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
972     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
973     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
979 #include <petscdraw.h>
980 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
981 {
982   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
983   PetscErrorCode    ierr;
984   PetscMPIInt       rank = baij->rank;
985   PetscInt          bs   = mat->rmap->bs;
986   PetscBool         iascii,isdraw;
987   PetscViewer       sviewer;
988   PetscViewerFormat format;
989 
990   PetscFunctionBegin;
991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
993   if (iascii) {
994     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
995     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
996       MatInfo info;
997       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1001                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1002       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1003       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1004       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1005       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1006       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1009       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1012       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1013       PetscFunctionReturn(0);
1014     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1015       PetscFunctionReturn(0);
1016     }
1017   }
1018 
1019   if (isdraw) {
1020     PetscDraw draw;
1021     PetscBool isnull;
1022     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1023     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1024     if (isnull) PetscFunctionReturn(0);
1025   }
1026 
1027   {
1028     /* assemble the entire matrix onto first processor. */
1029     Mat         A;
1030     Mat_SeqBAIJ *Aloc;
1031     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1032     MatScalar   *a;
1033     const char  *matname;
1034 
1035     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1036     /* Perhaps this should be the type of mat? */
1037     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1038     if (!rank) {
1039       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1040     } else {
1041       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1042     }
1043     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1044     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1045     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1046     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1047 
1048     /* copy over the A part */
1049     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1050     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1052 
1053     for (i=0; i<mbs; i++) {
1054       rvals[0] = bs*(baij->rstartbs + i);
1055       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1056       for (j=ai[i]; j<ai[i+1]; j++) {
1057         col = (baij->cstartbs+aj[j])*bs;
1058         for (k=0; k<bs; k++) {
1059           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060           col++; a += bs;
1061         }
1062       }
1063     }
1064     /* copy over the B part */
1065     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1066     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1067     for (i=0; i<mbs; i++) {
1068       rvals[0] = bs*(baij->rstartbs + i);
1069       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1070       for (j=ai[i]; j<ai[i+1]; j++) {
1071         col = baij->garray[aj[j]]*bs;
1072         for (k=0; k<bs; k++) {
1073           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074           col++; a += bs;
1075         }
1076       }
1077     }
1078     ierr = PetscFree(rvals);CHKERRQ(ierr);
1079     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     /*
1082        Everyone has to call to draw the matrix since the graphics waits are
1083        synchronized across all processors that share the PetscDraw object
1084     */
1085     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1087     if (!rank) {
1088       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1089       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1090     }
1091     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1093     ierr = MatDestroy(&A);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1099 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1100 {
1101   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1102   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1103   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1104   const PetscInt *garray = aij->garray;
1105   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1106   PetscInt       *rowlens,*colidxs;
1107   PetscScalar    *matvals;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1112 
1113   M  = mat->rmap->N;
1114   N  = mat->cmap->N;
1115   m  = mat->rmap->n;
1116   rs = mat->rmap->rstart;
1117   cs = mat->cmap->rstart;
1118   bs = mat->rmap->bs;
1119   nz = bs*bs*(A->nz + B->nz);
1120 
1121   /* write matrix header */
1122   header[0] = MAT_FILE_CLASSID;
1123   header[1] = M; header[2] = N; header[3] = nz;
1124   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1125   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1126 
1127   /* fill in and store row lengths */
1128   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1129   for (cnt=0, i=0; i<A->mbs; i++)
1130     for (j=0; j<bs; j++)
1131       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1132   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1133   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1134 
1135   /* fill in and store column indices */
1136   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++) {
1138     for (k=0; k<bs; k++) {
1139       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1140         if (garray[B->j[jb]] > cs/bs) break;
1141         for (l=0; l<bs; l++)
1142           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1143       }
1144       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1145         for (l=0; l<bs; l++)
1146           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1147       for (; jb<B->i[i+1]; jb++)
1148         for (l=0; l<bs; l++)
1149           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150     }
1151   }
1152   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1153   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1154   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1155 
1156   /* fill in and store nonzero values */
1157   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1158   for (cnt=0, i=0; i<A->mbs; i++) {
1159     for (k=0; k<bs; k++) {
1160       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1161         if (garray[B->j[jb]] > cs/bs) break;
1162         for (l=0; l<bs; l++)
1163           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1164       }
1165       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1166         for (l=0; l<bs; l++)
1167           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1168       for (; jb<B->i[i+1]; jb++)
1169         for (l=0; l<bs; l++)
1170           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171     }
1172   }
1173   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1174   ierr = PetscFree(matvals);CHKERRQ(ierr);
1175 
1176   /* write block size option to the viewer's .info file */
1177   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1182 {
1183   PetscErrorCode ierr;
1184   PetscBool      iascii,isdraw,issocket,isbinary;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1191   if (iascii || isdraw || issocket) {
1192     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193   } else if (isbinary) {
1194     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1200 {
1201   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205 #if defined(PETSC_USE_LOG)
1206   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1207 #endif
1208   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1209   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1210   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1212 #if defined(PETSC_USE_CTABLE)
1213   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1214 #else
1215   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1216 #endif
1217   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1218   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1219   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1220   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1221   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1222   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1224   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1225 
1226   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_HYPRE)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246   PetscInt       nt;
1247 
1248   PetscFunctionBegin;
1249   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1250   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1252   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1274 {
1275   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281   /* do local part */
1282   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283   /* add partial results together */
1284   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290 {
1291   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   /* do nondiagonal part */
1296   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297   /* do local part */
1298   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299   /* add partial results together */
1300   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /*
1306   This only works correctly for square matrices where the subblock A->A is the
1307    diagonal block
1308 */
1309 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1316   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1321 {
1322   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1327   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1334   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1335   PetscErrorCode ierr;
1336   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1337   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1338   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1339 
1340   PetscFunctionBegin;
1341   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1342   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1343   mat->getrowactive = PETSC_TRUE;
1344 
1345   if (!mat->rowvalues && (idx || v)) {
1346     /*
1347         allocate enough space to hold information from the longest row.
1348     */
1349     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1350     PetscInt    max = 1,mbs = mat->mbs,tmp;
1351     for (i=0; i<mbs; i++) {
1352       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1353       if (max < tmp) max = tmp;
1354     }
1355     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1356   }
1357   lrow = row - brstart;
1358 
1359   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1360   if (!v)   {pvA = NULL; pvB = NULL;}
1361   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1362   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1363   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1364   nztot = nzA + nzB;
1365 
1366   cmap = mat->garray;
1367   if (v  || idx) {
1368     if (nztot) {
1369       /* Sort by increasing column numbers, assuming A and B already sorted */
1370       PetscInt imark = -1;
1371       if (v) {
1372         *v = v_p = mat->rowvalues;
1373         for (i=0; i<nzB; i++) {
1374           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1375           else break;
1376         }
1377         imark = i;
1378         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1379         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1380       }
1381       if (idx) {
1382         *idx = idx_p = mat->rowindices;
1383         if (imark > -1) {
1384           for (i=0; i<imark; i++) {
1385             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1386           }
1387         } else {
1388           for (i=0; i<nzB; i++) {
1389             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1390             else break;
1391           }
1392           imark = i;
1393         }
1394         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1395         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396       }
1397     } else {
1398       if (idx) *idx = NULL;
1399       if (v)   *v   = NULL;
1400     }
1401   }
1402   *nz  = nztot;
1403   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1404   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1411 
1412   PetscFunctionBegin;
1413   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1414   baij->getrowactive = PETSC_FALSE;
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1419 {
1420   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1425   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1432   Mat            A  = a->A,B = a->B;
1433   PetscErrorCode ierr;
1434   PetscLogDouble isend[5],irecv[5];
1435 
1436   PetscFunctionBegin;
1437   info->block_size = (PetscReal)matin->rmap->bs;
1438 
1439   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1440 
1441   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1442   isend[3] = info->memory;  isend[4] = info->mallocs;
1443 
1444   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1445 
1446   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1447   isend[3] += info->memory;  isend[4] += info->mallocs;
1448 
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1479 {
1480   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   switch (op) {
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1487   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488   case MAT_KEEP_NONZERO_PATTERN:
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     MatCheckPreallocated(A,1);
1491     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1492     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1493     break;
1494   case MAT_ROW_ORIENTED:
1495     MatCheckPreallocated(A,1);
1496     a->roworiented = flg;
1497 
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1500     break;
1501   case MAT_FORCE_DIAGONAL_ENTRIES:
1502   case MAT_SORTED_FULL:
1503     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1504     break;
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506     a->donotstash = flg;
1507     break;
1508   case MAT_USE_HASH_TABLE:
1509     a->ht_flag = flg;
1510     a->ht_fact = 1.39;
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SUBMAT_SINGLEIS:
1516   case MAT_SYMMETRY_ETERNAL:
1517     MatCheckPreallocated(A,1);
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   default:
1521     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1527 {
1528   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1529   Mat_SeqBAIJ    *Aloc;
1530   Mat            B;
1531   PetscErrorCode ierr;
1532   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1533   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1534   MatScalar      *a;
1535 
1536   PetscFunctionBegin;
1537   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1538     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1539     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1540     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1541     /* Do not know preallocation information, but must set block size */
1542     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1543   } else {
1544     B = *matout;
1545   }
1546 
1547   /* copy over the A part */
1548   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1549   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1551 
1552   for (i=0; i<mbs; i++) {
1553     rvals[0] = bs*(baij->rstartbs + i);
1554     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1555     for (j=ai[i]; j<ai[i+1]; j++) {
1556       col = (baij->cstartbs+aj[j])*bs;
1557       for (k=0; k<bs; k++) {
1558         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1559 
1560         col++; a += bs;
1561       }
1562     }
1563   }
1564   /* copy over the B part */
1565   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1566   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567   for (i=0; i<mbs; i++) {
1568     rvals[0] = bs*(baij->rstartbs + i);
1569     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       col = baij->garray[aj[j]]*bs;
1572       for (k=0; k<bs; k++) {
1573         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1574         col++;
1575         a += bs;
1576       }
1577     }
1578   }
1579   ierr = PetscFree(rvals);CHKERRQ(ierr);
1580   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582 
1583   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1584   else {
1585     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1591 {
1592   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1593   Mat            a     = baij->A,b = baij->B;
1594   PetscErrorCode ierr;
1595   PetscInt       s1,s2,s3;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1599   if (rr) {
1600     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1601     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1602     /* Overlap communication with computation. */
1603     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604   }
1605   if (ll) {
1606     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1607     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1608     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1609   }
1610   /* scale  the diagonal block */
1611   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1612 
1613   if (rr) {
1614     /* Do a scatter end and then right scale the off-diagonal block */
1615     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1622 {
1623   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1624   PetscInt      *lrows;
1625   PetscInt       r, len;
1626   PetscBool      cong;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   /* get locally owned rows */
1631   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1632   /* fix right hand side if needed */
1633   if (x && b) {
1634     const PetscScalar *xx;
1635     PetscScalar       *bb;
1636 
1637     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1638     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1639     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1640     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1641     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1642   }
1643 
1644   /* actually zap the local rows */
1645   /*
1646         Zero the required rows. If the "diagonal block" of the matrix
1647      is square and the user wishes to set the diagonal we use separate
1648      code so that MatSetValues() is not called for each diagonal allocating
1649      new memory, thus calling lots of mallocs and slowing things down.
1650 
1651   */
1652   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1653   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1654   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1655   if ((diag != 0.0) && cong) {
1656     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1657   } else if (diag != 0.0) {
1658     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1659     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1660        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1661     for (r = 0; r < len; ++r) {
1662       const PetscInt row = lrows[r] + A->rmap->rstart;
1663       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   } else {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(lrows);CHKERRQ(ierr);
1671 
1672   /* only change matrix nonzero state if pattern was allowed to be changed */
1673   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1674     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1675     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1681 {
1682   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1683   PetscErrorCode    ierr;
1684   PetscMPIInt       n = A->rmap->n,p = 0;
1685   PetscInt          i,j,k,r,len = 0,row,col,count;
1686   PetscInt          *lrows,*owners = A->rmap->range;
1687   PetscSFNode       *rrows;
1688   PetscSF           sf;
1689   const PetscScalar *xx;
1690   PetscScalar       *bb,*mask;
1691   Vec               xmask,lmask;
1692   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1693   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1694   PetscScalar       *aa;
1695 
1696   PetscFunctionBegin;
1697   /* Create SF where leaves are input rows and roots are owned rows */
1698   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1699   for (r = 0; r < n; ++r) lrows[r] = -1;
1700   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1701   for (r = 0; r < N; ++r) {
1702     const PetscInt idx   = rows[r];
1703     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1704     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1705       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1706     }
1707     rrows[r].rank  = p;
1708     rrows[r].index = rows[r] - owners[p];
1709   }
1710   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1711   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1712   /* Collect flags for rows to be zeroed */
1713   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1714   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1716   /* Compress and put in row numbers */
1717   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1718   /* zero diagonal part of matrix */
1719   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1720   /* handle off diagonal part of matrix */
1721   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1722   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1723   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1724   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1725   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1726   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1729   if (x) {
1730     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1733     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1734   }
1735   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1736   /* remove zeroed rows of off diagonal matrix */
1737   for (i = 0; i < len; ++i) {
1738     row   = lrows[i];
1739     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1740     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1741     for (k = 0; k < count; ++k) {
1742       aa[0] = 0.0;
1743       aa   += bs;
1744     }
1745   }
1746   /* loop over all elements of off process part of matrix zeroing removed columns*/
1747   for (i = 0; i < l->B->rmap->N; ++i) {
1748     row = i/bs;
1749     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1750       for (k = 0; k < bs; ++k) {
1751         col = bs*baij->j[j] + k;
1752         if (PetscAbsScalar(mask[col])) {
1753           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1754           if (x) bb[i] -= aa[0]*xx[col];
1755           aa[0] = 0.0;
1756         }
1757       }
1758     }
1759   }
1760   if (x) {
1761     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1762     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1763   }
1764   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1765   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1766   ierr = PetscFree(lrows);CHKERRQ(ierr);
1767 
1768   /* only change matrix nonzero state if pattern was allowed to be changed */
1769   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1770     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1771     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1787 
1788 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1789 {
1790   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1791   Mat            a,b,c,d;
1792   PetscBool      flg;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   a = matA->A; b = matA->B;
1797   c = matB->A; d = matB->B;
1798 
1799   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1800   if (flg) {
1801     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1802   }
1803   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1808 {
1809   PetscErrorCode ierr;
1810   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1811   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1812 
1813   PetscFunctionBegin;
1814   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1815   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1816     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1819     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1820   }
1821   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1838   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1839   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1847 {
1848   PetscErrorCode ierr;
1849   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1850   PetscBLASInt   bnz,one=1;
1851   Mat_SeqBAIJ    *x,*y;
1852   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1853 
1854   PetscFunctionBegin;
1855   if (str == SAME_NONZERO_PATTERN) {
1856     PetscScalar alpha = a;
1857     x    = (Mat_SeqBAIJ*)xx->A->data;
1858     y    = (Mat_SeqBAIJ*)yy->A->data;
1859     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1860     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1861     x    = (Mat_SeqBAIJ*)xx->B->data;
1862     y    = (Mat_SeqBAIJ*)yy->B->data;
1863     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1864     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1865     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1866   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1867     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1868   } else {
1869     Mat      B;
1870     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1871     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1874     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1875     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1876     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1877     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1878     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1880     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1881     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1891 {
1892 #if defined(PETSC_USE_COMPLEX)
1893   PetscErrorCode ierr;
1894   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
1898   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
1899 #else
1900   PetscFunctionBegin;
1901 #endif
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1912   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1917 {
1918   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1923   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1928 {
1929   PetscErrorCode ierr;
1930   IS             iscol_local;
1931   PetscInt       csize;
1932 
1933   PetscFunctionBegin;
1934   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1935   if (call == MAT_REUSE_MATRIX) {
1936     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1937     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1938   } else {
1939     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1940   }
1941   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1942   if (call == MAT_INITIAL_MATRIX) {
1943     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1944     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /*
1950   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1951   in local and then by concatenating the local matrices the end result.
1952   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1953   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1954 */
1955 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1956 {
1957   PetscErrorCode ierr;
1958   PetscMPIInt    rank,size;
1959   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1961   Mat            M,Mreuse;
1962   MatScalar      *vwork,*aa;
1963   MPI_Comm       comm;
1964   IS             isrow_new, iscol_new;
1965   Mat_SeqBAIJ    *aij;
1966 
1967   PetscFunctionBegin;
1968   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1969   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1970   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1971   /* The compression and expansion should be avoided. Doesn't point
1972      out errors, might change the indices, hence buggey */
1973   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1974   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1975 
1976   if (call ==  MAT_REUSE_MATRIX) {
1977     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1978     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1979     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1980   } else {
1981     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1982   }
1983   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1984   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1985   /*
1986       m - number of local rows
1987       n - number of columns (same on all processors)
1988       rstart - first row in new global matrix generated
1989   */
1990   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1991   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1992   m    = m/bs;
1993   n    = n/bs;
1994 
1995   if (call == MAT_INITIAL_MATRIX) {
1996     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1997     ii  = aij->i;
1998     jj  = aij->j;
1999 
2000     /*
2001         Determine the number of non-zeros in the diagonal and off-diagonal
2002         portions of the matrix in order to do correct preallocation
2003     */
2004 
2005     /* first get start and end of "diagonal" columns */
2006     if (csize == PETSC_DECIDE) {
2007       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2008       if (mglobal == n*bs) { /* square matrix */
2009         nlocal = m;
2010       } else {
2011         nlocal = n/size + ((n % size) > rank);
2012       }
2013     } else {
2014       nlocal = csize/bs;
2015     }
2016     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
2017     rstart = rend - nlocal;
2018     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2019 
2020     /* next, compute all the lengths */
2021     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2022     for (i=0; i<m; i++) {
2023       jend = ii[i+1] - ii[i];
2024       olen = 0;
2025       dlen = 0;
2026       for (j=0; j<jend; j++) {
2027         if (*jj < rstart || *jj >= rend) olen++;
2028         else dlen++;
2029         jj++;
2030       }
2031       olens[i] = olen;
2032       dlens[i] = dlen;
2033     }
2034     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2035     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2036     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2037     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2038     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2040   } else {
2041     PetscInt ml,nl;
2042 
2043     M    = *newmat;
2044     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2046     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2047     /*
2048          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2049        rather than the slower MatSetValues().
2050     */
2051     M->was_assembled = PETSC_TRUE;
2052     M->assembled     = PETSC_FALSE;
2053   }
2054   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2055   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2056   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2057   ii   = aij->i;
2058   jj   = aij->j;
2059   aa   = aij->a;
2060   for (i=0; i<m; i++) {
2061     row   = rstart/bs + i;
2062     nz    = ii[i+1] - ii[i];
2063     cwork = jj;     jj += nz;
2064     vwork = aa;     aa += nz*bs*bs;
2065     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2066   }
2067 
2068   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   *newmat = M;
2071 
2072   /* save submatrix used in processor for next request */
2073   if (call ==  MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2075     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2081 {
2082   MPI_Comm       comm,pcomm;
2083   PetscInt       clocal_size,nrows;
2084   const PetscInt *rows;
2085   PetscMPIInt    size;
2086   IS             crowp,lcolp;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2091   /* make a collective version of 'rowp' */
2092   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2093   if (pcomm==comm) {
2094     crowp = rowp;
2095   } else {
2096     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2097     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2098     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2099     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2100   }
2101   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2102   /* make a local version of 'colp' */
2103   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2105   if (size==1) {
2106     lcolp = colp;
2107   } else {
2108     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2109   }
2110   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2111   /* now we just get the submatrix */
2112   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2113   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2114   /* clean up */
2115   if (pcomm!=comm) {
2116     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2117   }
2118   if (size>1) {
2119     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2125 {
2126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2127   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2128 
2129   PetscFunctionBegin;
2130   if (nghosts) *nghosts = B->nbs;
2131   if (ghosts) *ghosts = baij->garray;
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2136 {
2137   Mat            B;
2138   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2139   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2140   Mat_SeqAIJ     *b;
2141   PetscErrorCode ierr;
2142   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2143   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2144   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2145 
2146   PetscFunctionBegin;
2147   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2148   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2149 
2150   /* ----------------------------------------------------------------
2151      Tell every processor the number of nonzeros per row
2152   */
2153   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2154   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2155     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2156   }
2157   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2158   displs    = recvcounts + size;
2159   for (i=0; i<size; i++) {
2160     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2161     displs[i]     = A->rmap->range[i]/bs;
2162   }
2163 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2164   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2165 #else
2166   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2167   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2168 #endif
2169   /* ---------------------------------------------------------------
2170      Create the sequential matrix of the same type as the local block diagonal
2171   */
2172   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2173   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2174   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2175   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2176   b    = (Mat_SeqAIJ*)B->data;
2177 
2178   /*--------------------------------------------------------------------
2179     Copy my part of matrix column indices over
2180   */
2181   sendcount  = ad->nz + bd->nz;
2182   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2183   a_jsendbuf = ad->j;
2184   b_jsendbuf = bd->j;
2185   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2186   cnt        = 0;
2187   for (i=0; i<n; i++) {
2188 
2189     /* put in lower diagonal portion */
2190     m = bd->i[i+1] - bd->i[i];
2191     while (m > 0) {
2192       /* is it above diagonal (in bd (compressed) numbering) */
2193       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2194       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2195       m--;
2196     }
2197 
2198     /* put in diagonal portion */
2199     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2200       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2201     }
2202 
2203     /* put in upper diagonal portion */
2204     while (m-- > 0) {
2205       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2206     }
2207   }
2208   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2209 
2210   /*--------------------------------------------------------------------
2211     Gather all column indices to all processors
2212   */
2213   for (i=0; i<size; i++) {
2214     recvcounts[i] = 0;
2215     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2216       recvcounts[i] += lens[j];
2217     }
2218   }
2219   displs[0] = 0;
2220   for (i=1; i<size; i++) {
2221     displs[i] = displs[i-1] + recvcounts[i-1];
2222   }
2223 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2224   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2225 #else
2226   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2227 #endif
2228   /*--------------------------------------------------------------------
2229     Assemble the matrix into useable form (note numerical values not yet set)
2230   */
2231   /* set the b->ilen (length of each row) values */
2232   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2233   /* set the b->i indices */
2234   b->i[0] = 0;
2235   for (i=1; i<=A->rmap->N/bs; i++) {
2236     b->i[i] = b->i[i-1] + lens[i-1];
2237   }
2238   ierr = PetscFree(lens);CHKERRQ(ierr);
2239   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2241   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2242 
2243   if (A->symmetric) {
2244     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2245   } else if (A->hermitian) {
2246     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2247   } else if (A->structurally_symmetric) {
2248     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2249   }
2250   *newmat = B;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2255 {
2256   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2257   PetscErrorCode ierr;
2258   Vec            bb1 = NULL;
2259 
2260   PetscFunctionBegin;
2261   if (flag == SOR_APPLY_UPPER) {
2262     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2263     PetscFunctionReturn(0);
2264   }
2265 
2266   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2267     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2268   }
2269 
2270   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2271     if (flag & SOR_ZERO_INITIAL_GUESS) {
2272       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2273       its--;
2274     }
2275 
2276     while (its--) {
2277       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2278       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2279 
2280       /* update rhs: bb1 = bb - B*x */
2281       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2282       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2283 
2284       /* local sweep */
2285       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2286     }
2287   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2288     if (flag & SOR_ZERO_INITIAL_GUESS) {
2289       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2290       its--;
2291     }
2292     while (its--) {
2293       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2294       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2295 
2296       /* update rhs: bb1 = bb - B*x */
2297       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2298       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2299 
2300       /* local sweep */
2301       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2302     }
2303   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2304     if (flag & SOR_ZERO_INITIAL_GUESS) {
2305       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2306       its--;
2307     }
2308     while (its--) {
2309       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2310       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2311 
2312       /* update rhs: bb1 = bb - B*x */
2313       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2314       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2315 
2316       /* local sweep */
2317       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2318     }
2319   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2320 
2321   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2326 {
2327   PetscErrorCode ierr;
2328   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2329   PetscInt       N,i,*garray = aij->garray;
2330   PetscInt       ib,jb,bs = A->rmap->bs;
2331   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2332   MatScalar      *a_val = a_aij->a;
2333   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2334   MatScalar      *b_val = b_aij->a;
2335   PetscReal      *work;
2336 
2337   PetscFunctionBegin;
2338   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2339   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2340   if (type == NORM_2) {
2341     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2342       for (jb=0; jb<bs; jb++) {
2343         for (ib=0; ib<bs; ib++) {
2344           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2345           a_val++;
2346         }
2347       }
2348     }
2349     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2350       for (jb=0; jb<bs; jb++) {
2351         for (ib=0; ib<bs; ib++) {
2352           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2353           b_val++;
2354         }
2355       }
2356     }
2357   } else if (type == NORM_1) {
2358     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2359       for (jb=0; jb<bs; jb++) {
2360         for (ib=0; ib<bs; ib++) {
2361           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2362           a_val++;
2363         }
2364       }
2365     }
2366     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2367       for (jb=0; jb<bs; jb++) {
2368        for (ib=0; ib<bs; ib++) {
2369           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2370           b_val++;
2371         }
2372       }
2373     }
2374   } else if (type == NORM_INFINITY) {
2375     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2376       for (jb=0; jb<bs; jb++) {
2377         for (ib=0; ib<bs; ib++) {
2378           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2379           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2380           a_val++;
2381         }
2382       }
2383     }
2384     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2385       for (jb=0; jb<bs; jb++) {
2386         for (ib=0; ib<bs; ib++) {
2387           int col = garray[b_aij->j[i]] * bs + jb;
2388           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2389           b_val++;
2390         }
2391       }
2392     }
2393   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2394   if (type == NORM_INFINITY) {
2395     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2396   } else {
2397     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2398   }
2399   ierr = PetscFree(work);CHKERRQ(ierr);
2400   if (type == NORM_2) {
2401     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2402   }
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2407 {
2408   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2409   PetscErrorCode ierr;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2413   A->factorerrortype             = a->A->factorerrortype;
2414   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2415   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2420 {
2421   PetscErrorCode ierr;
2422   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2423   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2424 
2425   PetscFunctionBegin;
2426   if (!Y->preallocated) {
2427     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2428   } else if (!aij->nz) {
2429     PetscInt nonew = aij->nonew;
2430     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2431     aij->nonew = nonew;
2432   }
2433   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2438 {
2439   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2444   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2445   if (d) {
2446     PetscInt rstart;
2447     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2448     *d += rstart/A->rmap->bs;
2449 
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2455 {
2456   PetscFunctionBegin;
2457   *a = ((Mat_MPIBAIJ*)A->data)->A;
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 /* -------------------------------------------------------------------*/
2462 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2463                                        MatGetRow_MPIBAIJ,
2464                                        MatRestoreRow_MPIBAIJ,
2465                                        MatMult_MPIBAIJ,
2466                                 /* 4*/ MatMultAdd_MPIBAIJ,
2467                                        MatMultTranspose_MPIBAIJ,
2468                                        MatMultTransposeAdd_MPIBAIJ,
2469                                        NULL,
2470                                        NULL,
2471                                        NULL,
2472                                 /*10*/ NULL,
2473                                        NULL,
2474                                        NULL,
2475                                        MatSOR_MPIBAIJ,
2476                                        MatTranspose_MPIBAIJ,
2477                                 /*15*/ MatGetInfo_MPIBAIJ,
2478                                        MatEqual_MPIBAIJ,
2479                                        MatGetDiagonal_MPIBAIJ,
2480                                        MatDiagonalScale_MPIBAIJ,
2481                                        MatNorm_MPIBAIJ,
2482                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2483                                        MatAssemblyEnd_MPIBAIJ,
2484                                        MatSetOption_MPIBAIJ,
2485                                        MatZeroEntries_MPIBAIJ,
2486                                 /*24*/ MatZeroRows_MPIBAIJ,
2487                                        NULL,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                 /*29*/ MatSetUp_MPIBAIJ,
2492                                        NULL,
2493                                        NULL,
2494                                        MatGetDiagonalBlock_MPIBAIJ,
2495                                        NULL,
2496                                 /*34*/ MatDuplicate_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                 /*39*/ MatAXPY_MPIBAIJ,
2502                                        MatCreateSubMatrices_MPIBAIJ,
2503                                        MatIncreaseOverlap_MPIBAIJ,
2504                                        MatGetValues_MPIBAIJ,
2505                                        MatCopy_MPIBAIJ,
2506                                 /*44*/ NULL,
2507                                        MatScale_MPIBAIJ,
2508                                        MatShift_MPIBAIJ,
2509                                        NULL,
2510                                        MatZeroRowsColumns_MPIBAIJ,
2511                                 /*49*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2517                                        NULL,
2518                                        MatSetUnfactored_MPIBAIJ,
2519                                        MatPermute_MPIBAIJ,
2520                                        MatSetValuesBlocked_MPIBAIJ,
2521                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2522                                        MatDestroy_MPIBAIJ,
2523                                        MatView_MPIBAIJ,
2524                                        NULL,
2525                                        NULL,
2526                                 /*64*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2532                                        NULL,
2533                                        NULL,
2534                                        NULL,
2535                                        NULL,
2536                                 /*74*/ NULL,
2537                                        MatFDColoringApply_BAIJ,
2538                                        NULL,
2539                                        NULL,
2540                                        NULL,
2541                                 /*79*/ NULL,
2542                                        NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        MatLoad_MPIBAIJ,
2546                                 /*84*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                 /*89*/ NULL,
2552                                        NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                 /*94*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                 /*99*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        MatConjugate_MPIBAIJ,
2565                                        NULL,
2566                                 /*104*/NULL,
2567                                        MatRealPart_MPIBAIJ,
2568                                        MatImaginaryPart_MPIBAIJ,
2569                                        NULL,
2570                                        NULL,
2571                                 /*109*/NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        MatMissingDiagonal_MPIBAIJ,
2576                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2577                                        NULL,
2578                                        MatGetGhosts_MPIBAIJ,
2579                                        NULL,
2580                                        NULL,
2581                                 /*119*/NULL,
2582                                        NULL,
2583                                        NULL,
2584                                        NULL,
2585                                        MatGetMultiProcBlock_MPIBAIJ,
2586                                 /*124*/NULL,
2587                                        MatGetColumnNorms_MPIBAIJ,
2588                                        MatInvertBlockDiagonal_MPIBAIJ,
2589                                        NULL,
2590                                        NULL,
2591                                /*129*/ NULL,
2592                                        NULL,
2593                                        NULL,
2594                                        NULL,
2595                                        NULL,
2596                                /*134*/ NULL,
2597                                        NULL,
2598                                        NULL,
2599                                        NULL,
2600                                        NULL,
2601                                /*139*/ MatSetBlockSizes_Default,
2602                                        NULL,
2603                                        NULL,
2604                                        MatFDColoringSetUp_MPIXAIJ,
2605                                        NULL,
2606                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2607 };
2608 
2609 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2610 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2611 
2612 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2613 {
2614   PetscInt       m,rstart,cstart,cend;
2615   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2616   const PetscInt *JJ    =NULL;
2617   PetscScalar    *values=NULL;
2618   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2619   PetscErrorCode ierr;
2620   PetscBool      nooffprocentries;
2621 
2622   PetscFunctionBegin;
2623   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2624   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2625   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2626   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2627   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2628   m      = B->rmap->n/bs;
2629   rstart = B->rmap->rstart/bs;
2630   cstart = B->cmap->rstart/bs;
2631   cend   = B->cmap->rend/bs;
2632 
2633   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2634   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2635   for (i=0; i<m; i++) {
2636     nz = ii[i+1] - ii[i];
2637     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2638     nz_max = PetscMax(nz_max,nz);
2639     dlen   = 0;
2640     olen   = 0;
2641     JJ     = jj + ii[i];
2642     for (j=0; j<nz; j++) {
2643       if (*JJ < cstart || *JJ >= cend) olen++;
2644       else dlen++;
2645       JJ++;
2646     }
2647     d_nnz[i] = dlen;
2648     o_nnz[i] = olen;
2649   }
2650   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2651   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2652 
2653   values = (PetscScalar*)V;
2654   if (!values) {
2655     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2656   }
2657   for (i=0; i<m; i++) {
2658     PetscInt          row    = i + rstart;
2659     PetscInt          ncols  = ii[i+1] - ii[i];
2660     const PetscInt    *icols = jj + ii[i];
2661     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2662       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2663       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2664     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2665       PetscInt j;
2666       for (j=0; j<ncols; j++) {
2667         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2668         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2669       }
2670     }
2671   }
2672 
2673   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2674   nooffprocentries    = B->nooffprocentries;
2675   B->nooffprocentries = PETSC_TRUE;
2676   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2677   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2678   B->nooffprocentries = nooffprocentries;
2679 
2680   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 /*@C
2685    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2686 
2687    Collective
2688 
2689    Input Parameters:
2690 +  B - the matrix
2691 .  bs - the block size
2692 .  i - the indices into j for the start of each local row (starts with zero)
2693 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2694 -  v - optional values in the matrix
2695 
2696    Level: advanced
2697 
2698    Notes:
2699     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2700    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2701    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2702    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2703    block column and the second index is over columns within a block.
2704 
2705    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2706 
2707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2708 @*/
2709 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2710 {
2711   PetscErrorCode ierr;
2712 
2713   PetscFunctionBegin;
2714   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2715   PetscValidType(B,1);
2716   PetscValidLogicalCollectiveInt(B,bs,2);
2717   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2722 {
2723   Mat_MPIBAIJ    *b;
2724   PetscErrorCode ierr;
2725   PetscInt       i;
2726   PetscMPIInt    size;
2727 
2728   PetscFunctionBegin;
2729   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2730   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2731   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2732   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2733 
2734   if (d_nnz) {
2735     for (i=0; i<B->rmap->n/bs; i++) {
2736       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2737     }
2738   }
2739   if (o_nnz) {
2740     for (i=0; i<B->rmap->n/bs; i++) {
2741       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2742     }
2743   }
2744 
2745   b      = (Mat_MPIBAIJ*)B->data;
2746   b->bs2 = bs*bs;
2747   b->mbs = B->rmap->n/bs;
2748   b->nbs = B->cmap->n/bs;
2749   b->Mbs = B->rmap->N/bs;
2750   b->Nbs = B->cmap->N/bs;
2751 
2752   for (i=0; i<=b->size; i++) {
2753     b->rangebs[i] = B->rmap->range[i]/bs;
2754   }
2755   b->rstartbs = B->rmap->rstart/bs;
2756   b->rendbs   = B->rmap->rend/bs;
2757   b->cstartbs = B->cmap->rstart/bs;
2758   b->cendbs   = B->cmap->rend/bs;
2759 
2760 #if defined(PETSC_USE_CTABLE)
2761   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2762 #else
2763   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2764 #endif
2765   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2766   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2767   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2768 
2769   /* Because the B will have been resized we simply destroy it and create a new one each time */
2770   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2771   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2772   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2773   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2774   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2775   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2776 
2777   if (!B->preallocated) {
2778     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2779     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2780     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2781     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2782     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2783   }
2784 
2785   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2786   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2787   B->preallocated  = PETSC_TRUE;
2788   B->was_assembled = PETSC_FALSE;
2789   B->assembled     = PETSC_FALSE;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2794 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2795 
2796 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2797 {
2798   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2799   PetscErrorCode ierr;
2800   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2801   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2802   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2803 
2804   PetscFunctionBegin;
2805   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2806   ii[0] = 0;
2807   for (i=0; i<M; i++) {
2808     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2809     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2810     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2811     /* remove one from count of matrix has diagonal */
2812     for (j=id[i]; j<id[i+1]; j++) {
2813       if (jd[j] == i) {ii[i+1]--;break;}
2814     }
2815   }
2816   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2817   cnt  = 0;
2818   for (i=0; i<M; i++) {
2819     for (j=io[i]; j<io[i+1]; j++) {
2820       if (garray[jo[j]] > rstart) break;
2821       jj[cnt++] = garray[jo[j]];
2822     }
2823     for (k=id[i]; k<id[i+1]; k++) {
2824       if (jd[k] != i) {
2825         jj[cnt++] = rstart + jd[k];
2826       }
2827     }
2828     for (; j<io[i+1]; j++) {
2829       jj[cnt++] = garray[jo[j]];
2830     }
2831   }
2832   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2837 
2838 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2839 
2840 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2841 {
2842   PetscErrorCode ierr;
2843   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2844   Mat            B;
2845   Mat_MPIAIJ     *b;
2846 
2847   PetscFunctionBegin;
2848   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2849 
2850   if (reuse == MAT_REUSE_MATRIX) {
2851     B = *newmat;
2852   } else {
2853     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2854     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2855     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2856     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2857     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2858     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2859   }
2860   b = (Mat_MPIAIJ*) B->data;
2861 
2862   if (reuse == MAT_REUSE_MATRIX) {
2863     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2864     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2865   } else {
2866     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2867     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2868     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2869     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2870     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2871     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2872     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2873   }
2874   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2875   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2876 
2877   if (reuse == MAT_INPLACE_MATRIX) {
2878     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2879   } else {
2880    *newmat = B;
2881   }
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 /*MC
2886    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2887 
2888    Options Database Keys:
2889 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2890 . -mat_block_size <bs> - set the blocksize used to store the matrix
2891 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2892 - -mat_use_hash_table <fact>
2893 
2894    Level: beginner
2895 
2896    Notes:
2897     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2898     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2899 
2900 .seealso: MatCreateBAIJ
2901 M*/
2902 
2903 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2904 
2905 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2906 {
2907   Mat_MPIBAIJ    *b;
2908   PetscErrorCode ierr;
2909   PetscBool      flg = PETSC_FALSE;
2910 
2911   PetscFunctionBegin;
2912   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2913   B->data = (void*)b;
2914 
2915   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2916   B->assembled = PETSC_FALSE;
2917 
2918   B->insertmode = NOT_SET_VALUES;
2919   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2920   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2921 
2922   /* build local table of row and column ownerships */
2923   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2924 
2925   /* build cache for off array entries formed */
2926   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2927 
2928   b->donotstash  = PETSC_FALSE;
2929   b->colmap      = NULL;
2930   b->garray      = NULL;
2931   b->roworiented = PETSC_TRUE;
2932 
2933   /* stuff used in block assembly */
2934   b->barray = NULL;
2935 
2936   /* stuff used for matrix vector multiply */
2937   b->lvec  = NULL;
2938   b->Mvctx = NULL;
2939 
2940   /* stuff for MatGetRow() */
2941   b->rowindices   = NULL;
2942   b->rowvalues    = NULL;
2943   b->getrowactive = PETSC_FALSE;
2944 
2945   /* hash table stuff */
2946   b->ht           = NULL;
2947   b->hd           = NULL;
2948   b->ht_size      = 0;
2949   b->ht_flag      = PETSC_FALSE;
2950   b->ht_fact      = 0;
2951   b->ht_total_ct  = 0;
2952   b->ht_insert_ct = 0;
2953 
2954   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2955   b->ijonly = PETSC_FALSE;
2956 
2957   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2958   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2959   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2960 #if defined(PETSC_HAVE_HYPRE)
2961   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2962 #endif
2963   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2964   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2965   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2966   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2967   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2968   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2969   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2970   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2971 
2972   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2973   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2974   if (flg) {
2975     PetscReal fact = 1.39;
2976     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2977     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2978     if (fact <= 1.0) fact = 1.39;
2979     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2980     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2981   }
2982   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2983   PetscFunctionReturn(0);
2984 }
2985 
2986 /*MC
2987    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2988 
2989    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2990    and MATMPIBAIJ otherwise.
2991 
2992    Options Database Keys:
2993 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2994 
2995   Level: beginner
2996 
2997 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2998 M*/
2999 
3000 /*@C
3001    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3002    (block compressed row).  For good matrix assembly performance
3003    the user should preallocate the matrix storage by setting the parameters
3004    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3005    performance can be increased by more than a factor of 50.
3006 
3007    Collective on Mat
3008 
3009    Input Parameters:
3010 +  B - the matrix
3011 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3012           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3013 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3014            submatrix  (same for all local rows)
3015 .  d_nnz - array containing the number of block nonzeros in the various block rows
3016            of the in diagonal portion of the local (possibly different for each block
3017            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3018            set it even if it is zero.
3019 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3020            submatrix (same for all local rows).
3021 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3022            off-diagonal portion of the local submatrix (possibly different for
3023            each block row) or NULL.
3024 
3025    If the *_nnz parameter is given then the *_nz parameter is ignored
3026 
3027    Options Database Keys:
3028 +   -mat_block_size - size of the blocks to use
3029 -   -mat_use_hash_table <fact>
3030 
3031    Notes:
3032    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3033    than it must be used on all processors that share the object for that argument.
3034 
3035    Storage Information:
3036    For a square global matrix we define each processor's diagonal portion
3037    to be its local rows and the corresponding columns (a square submatrix);
3038    each processor's off-diagonal portion encompasses the remainder of the
3039    local matrix (a rectangular submatrix).
3040 
3041    The user can specify preallocated storage for the diagonal part of
3042    the local submatrix with either d_nz or d_nnz (not both).  Set
3043    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3044    memory allocation.  Likewise, specify preallocated storage for the
3045    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3046 
3047    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3048    the figure below we depict these three local rows and all columns (0-11).
3049 
3050 .vb
3051            0 1 2 3 4 5 6 7 8 9 10 11
3052           --------------------------
3053    row 3  |o o o d d d o o o o  o  o
3054    row 4  |o o o d d d o o o o  o  o
3055    row 5  |o o o d d d o o o o  o  o
3056           --------------------------
3057 .ve
3058 
3059    Thus, any entries in the d locations are stored in the d (diagonal)
3060    submatrix, and any entries in the o locations are stored in the
3061    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3062    stored simply in the MATSEQBAIJ format for compressed row storage.
3063 
3064    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3065    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3066    In general, for PDE problems in which most nonzeros are near the diagonal,
3067    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3068    or you will get TERRIBLE performance; see the users' manual chapter on
3069    matrices.
3070 
3071    You can call MatGetInfo() to get information on how effective the preallocation was;
3072    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3073    You can also run with the option -info and look for messages with the string
3074    malloc in them to see if additional memory allocation was needed.
3075 
3076    Level: intermediate
3077 
3078 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3079 @*/
3080 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3081 {
3082   PetscErrorCode ierr;
3083 
3084   PetscFunctionBegin;
3085   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3086   PetscValidType(B,1);
3087   PetscValidLogicalCollectiveInt(B,bs,2);
3088   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 /*@C
3093    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3094    (block compressed row).  For good matrix assembly performance
3095    the user should preallocate the matrix storage by setting the parameters
3096    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3097    performance can be increased by more than a factor of 50.
3098 
3099    Collective
3100 
3101    Input Parameters:
3102 +  comm - MPI communicator
3103 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3104           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3105 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3106            This value should be the same as the local size used in creating the
3107            y vector for the matrix-vector product y = Ax.
3108 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3109            This value should be the same as the local size used in creating the
3110            x vector for the matrix-vector product y = Ax.
3111 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3112 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3113 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3114            submatrix  (same for all local rows)
3115 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3116            of the in diagonal portion of the local (possibly different for each block
3117            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3118            and set it even if it is zero.
3119 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3120            submatrix (same for all local rows).
3121 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3122            off-diagonal portion of the local submatrix (possibly different for
3123            each block row) or NULL.
3124 
3125    Output Parameter:
3126 .  A - the matrix
3127 
3128    Options Database Keys:
3129 +   -mat_block_size - size of the blocks to use
3130 -   -mat_use_hash_table <fact>
3131 
3132    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3133    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3134    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3135 
3136    Notes:
3137    If the *_nnz parameter is given then the *_nz parameter is ignored
3138 
3139    A nonzero block is any block that as 1 or more nonzeros in it
3140 
3141    The user MUST specify either the local or global matrix dimensions
3142    (possibly both).
3143 
3144    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3145    than it must be used on all processors that share the object for that argument.
3146 
3147    Storage Information:
3148    For a square global matrix we define each processor's diagonal portion
3149    to be its local rows and the corresponding columns (a square submatrix);
3150    each processor's off-diagonal portion encompasses the remainder of the
3151    local matrix (a rectangular submatrix).
3152 
3153    The user can specify preallocated storage for the diagonal part of
3154    the local submatrix with either d_nz or d_nnz (not both).  Set
3155    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3156    memory allocation.  Likewise, specify preallocated storage for the
3157    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3158 
3159    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3160    the figure below we depict these three local rows and all columns (0-11).
3161 
3162 .vb
3163            0 1 2 3 4 5 6 7 8 9 10 11
3164           --------------------------
3165    row 3  |o o o d d d o o o o  o  o
3166    row 4  |o o o d d d o o o o  o  o
3167    row 5  |o o o d d d o o o o  o  o
3168           --------------------------
3169 .ve
3170 
3171    Thus, any entries in the d locations are stored in the d (diagonal)
3172    submatrix, and any entries in the o locations are stored in the
3173    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3174    stored simply in the MATSEQBAIJ format for compressed row storage.
3175 
3176    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3177    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3178    In general, for PDE problems in which most nonzeros are near the diagonal,
3179    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3180    or you will get TERRIBLE performance; see the users' manual chapter on
3181    matrices.
3182 
3183    Level: intermediate
3184 
3185 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3186 @*/
3187 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3188 {
3189   PetscErrorCode ierr;
3190   PetscMPIInt    size;
3191 
3192   PetscFunctionBegin;
3193   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3194   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3195   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3196   if (size > 1) {
3197     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3198     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3199   } else {
3200     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3201     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3202   }
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3207 {
3208   Mat            mat;
3209   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3210   PetscErrorCode ierr;
3211   PetscInt       len=0;
3212 
3213   PetscFunctionBegin;
3214   *newmat = NULL;
3215   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3216   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3217   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3218 
3219   mat->factortype   = matin->factortype;
3220   mat->preallocated = PETSC_TRUE;
3221   mat->assembled    = PETSC_TRUE;
3222   mat->insertmode   = NOT_SET_VALUES;
3223 
3224   a             = (Mat_MPIBAIJ*)mat->data;
3225   mat->rmap->bs = matin->rmap->bs;
3226   a->bs2        = oldmat->bs2;
3227   a->mbs        = oldmat->mbs;
3228   a->nbs        = oldmat->nbs;
3229   a->Mbs        = oldmat->Mbs;
3230   a->Nbs        = oldmat->Nbs;
3231 
3232   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3233   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3234 
3235   a->size         = oldmat->size;
3236   a->rank         = oldmat->rank;
3237   a->donotstash   = oldmat->donotstash;
3238   a->roworiented  = oldmat->roworiented;
3239   a->rowindices   = NULL;
3240   a->rowvalues    = NULL;
3241   a->getrowactive = PETSC_FALSE;
3242   a->barray       = NULL;
3243   a->rstartbs     = oldmat->rstartbs;
3244   a->rendbs       = oldmat->rendbs;
3245   a->cstartbs     = oldmat->cstartbs;
3246   a->cendbs       = oldmat->cendbs;
3247 
3248   /* hash table stuff */
3249   a->ht           = NULL;
3250   a->hd           = NULL;
3251   a->ht_size      = 0;
3252   a->ht_flag      = oldmat->ht_flag;
3253   a->ht_fact      = oldmat->ht_fact;
3254   a->ht_total_ct  = 0;
3255   a->ht_insert_ct = 0;
3256 
3257   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3258   if (oldmat->colmap) {
3259 #if defined(PETSC_USE_CTABLE)
3260     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3261 #else
3262     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3263     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3264     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3265 #endif
3266   } else a->colmap = NULL;
3267 
3268   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3269     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3270     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3271     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3272   } else a->garray = NULL;
3273 
3274   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3275   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3276   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3277   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3278   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3279 
3280   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3281   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3282   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3283   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3284   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3285   *newmat = mat;
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3290 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3291 {
3292   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3293   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3294   PetscScalar    *matvals;
3295   PetscErrorCode ierr;
3296 
3297   PetscFunctionBegin;
3298   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3299 
3300   /* read in matrix header */
3301   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3302   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3303   M  = header[1]; N = header[2]; nz = header[3];
3304   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3305   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3306   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3307 
3308   /* set block sizes from the viewer's .info file */
3309   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3310   /* set local sizes if not set already */
3311   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3312   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3313   /* set global sizes if not set already */
3314   if (mat->rmap->N < 0) mat->rmap->N = M;
3315   if (mat->cmap->N < 0) mat->cmap->N = N;
3316   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3317   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3318 
3319   /* check if the matrix sizes are correct */
3320   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3321   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3322   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3323   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3324   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr);
3325   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr);
3326   mbs = m/bs; nbs = n/bs;
3327 
3328   /* read in row lengths and build row indices */
3329   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3330   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3331   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3332   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3333   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3334 
3335   /* read in column indices and matrix values */
3336   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3337   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3338   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3339 
3340   { /* preallocate matrix storage */
3341     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3342     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3343     PetscBool  sbaij,done;
3344     PetscInt   *d_nnz,*o_nnz;
3345 
3346     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3347     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3348     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3349     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3350     for (i=0; i<mbs; i++) {
3351       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3352       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3353       for (k=0; k<bs; k++) {
3354         PetscInt row = bs*i + k;
3355         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3356           PetscInt col = colidxs[j];
3357           if (!sbaij || col >= row) {
3358             if (col >= cs && col < ce) {
3359               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3360             } else {
3361               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3362               if (done) o_nnz[i]++;
3363             }
3364           }
3365         }
3366       }
3367     }
3368     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3369     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3370     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3371     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3372     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3373   }
3374 
3375   /* store matrix values */
3376   for (i=0; i<m; i++) {
3377     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3378     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3379   }
3380 
3381   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3382   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3383   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3384   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 
3388 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3389 {
3390   PetscErrorCode ierr;
3391   PetscBool      isbinary;
3392 
3393   PetscFunctionBegin;
3394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3395   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3396   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3397   PetscFunctionReturn(0);
3398 }
3399 
3400 /*@
3401    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3402 
3403    Input Parameters:
3404 +  mat  - the matrix
3405 -  fact - factor
3406 
3407    Not Collective, each process can use a different factor
3408 
3409    Level: advanced
3410 
3411   Notes:
3412    This can also be set by the command line option: -mat_use_hash_table <fact>
3413 
3414 .seealso: MatSetOption()
3415 @*/
3416 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3417 {
3418   PetscErrorCode ierr;
3419 
3420   PetscFunctionBegin;
3421   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3426 {
3427   Mat_MPIBAIJ *baij;
3428 
3429   PetscFunctionBegin;
3430   baij          = (Mat_MPIBAIJ*)mat->data;
3431   baij->ht_fact = fact;
3432   PetscFunctionReturn(0);
3433 }
3434 
3435 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3436 {
3437   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3438   PetscBool      flg;
3439   PetscErrorCode ierr;
3440 
3441   PetscFunctionBegin;
3442   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3443   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3444   if (Ad)     *Ad     = a->A;
3445   if (Ao)     *Ao     = a->B;
3446   if (colmap) *colmap = a->garray;
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 /*
3451     Special version for direct calls from Fortran (to eliminate two function call overheads
3452 */
3453 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3454 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3455 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3456 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3457 #endif
3458 
3459 /*@C
3460   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3461 
3462   Collective on Mat
3463 
3464   Input Parameters:
3465 + mat - the matrix
3466 . min - number of input rows
3467 . im - input rows
3468 . nin - number of input columns
3469 . in - input columns
3470 . v - numerical values input
3471 - addvin - INSERT_VALUES or ADD_VALUES
3472 
3473   Notes:
3474     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3475 
3476   Level: advanced
3477 
3478 .seealso:   MatSetValuesBlocked()
3479 @*/
3480 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3481 {
3482   /* convert input arguments to C version */
3483   Mat        mat  = *matin;
3484   PetscInt   m    = *min, n = *nin;
3485   InsertMode addv = *addvin;
3486 
3487   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3488   const MatScalar *value;
3489   MatScalar       *barray     = baij->barray;
3490   PetscBool       roworiented = baij->roworiented;
3491   PetscErrorCode  ierr;
3492   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3493   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3494   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3495 
3496   PetscFunctionBegin;
3497   /* tasks normally handled by MatSetValuesBlocked() */
3498   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3499   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3500   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3501   if (mat->assembled) {
3502     mat->was_assembled = PETSC_TRUE;
3503     mat->assembled     = PETSC_FALSE;
3504   }
3505   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3506 
3507   if (!barray) {
3508     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3509     baij->barray = barray;
3510   }
3511 
3512   if (roworiented) stepval = (n-1)*bs;
3513   else stepval = (m-1)*bs;
3514 
3515   for (i=0; i<m; i++) {
3516     if (im[i] < 0) continue;
3517     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3518     if (im[i] >= rstart && im[i] < rend) {
3519       row = im[i] - rstart;
3520       for (j=0; j<n; j++) {
3521         /* If NumCol = 1 then a copy is not required */
3522         if ((roworiented) && (n == 1)) {
3523           barray = (MatScalar*)v + i*bs2;
3524         } else if ((!roworiented) && (m == 1)) {
3525           barray = (MatScalar*)v + j*bs2;
3526         } else { /* Here a copy is required */
3527           if (roworiented) {
3528             value = v + i*(stepval+bs)*bs + j*bs;
3529           } else {
3530             value = v + j*(stepval+bs)*bs + i*bs;
3531           }
3532           for (ii=0; ii<bs; ii++,value+=stepval) {
3533             for (jj=0; jj<bs; jj++) {
3534               *barray++ = *value++;
3535             }
3536           }
3537           barray -=bs2;
3538         }
3539 
3540         if (in[j] >= cstart && in[j] < cend) {
3541           col  = in[j] - cstart;
3542           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3543         } else if (in[j] < 0) continue;
3544         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3545         else {
3546           if (mat->was_assembled) {
3547             if (!baij->colmap) {
3548               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3549             }
3550 
3551 #if defined(PETSC_USE_DEBUG)
3552 #if defined(PETSC_USE_CTABLE)
3553             { PetscInt data;
3554               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3555               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3556             }
3557 #else
3558             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3559 #endif
3560 #endif
3561 #if defined(PETSC_USE_CTABLE)
3562             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3563             col  = (col - 1)/bs;
3564 #else
3565             col = (baij->colmap[in[j]] - 1)/bs;
3566 #endif
3567             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3568               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3569               col  =  in[j];
3570             }
3571           } else col = in[j];
3572           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3573         }
3574       }
3575     } else {
3576       if (!baij->donotstash) {
3577         if (roworiented) {
3578           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3579         } else {
3580           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3581         }
3582       }
3583     }
3584   }
3585 
3586   /* task normally handled by MatSetValuesBlocked() */
3587   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3588   PetscFunctionReturn(0);
3589 }
3590 
3591 /*@
3592      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3593          CSR format the local rows.
3594 
3595    Collective
3596 
3597    Input Parameters:
3598 +  comm - MPI communicator
3599 .  bs - the block size, only a block size of 1 is supported
3600 .  m - number of local rows (Cannot be PETSC_DECIDE)
3601 .  n - This value should be the same as the local size used in creating the
3602        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3603        calculated if N is given) For square matrices n is almost always m.
3604 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3605 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3606 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3607 .   j - column indices
3608 -   a - matrix values
3609 
3610    Output Parameter:
3611 .   mat - the matrix
3612 
3613    Level: intermediate
3614 
3615    Notes:
3616        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3617      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3618      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3619 
3620      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3621      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3622      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3623      with column-major ordering within blocks.
3624 
3625        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3626 
3627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3628           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3629 @*/
3630 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3631 {
3632   PetscErrorCode ierr;
3633 
3634   PetscFunctionBegin;
3635   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3636   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3637   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3638   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3639   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3640   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3641   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3642   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3643   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3644   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3645   PetscFunctionReturn(0);
3646 }
3647 
3648 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3649 {
3650   PetscErrorCode ierr;
3651   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3652   PetscInt       *indx;
3653   PetscScalar    *values;
3654 
3655   PetscFunctionBegin;
3656   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3657   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3658     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3659     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3660     PetscInt       *bindx,rmax=a->rmax,j;
3661     PetscMPIInt    rank,size;
3662 
3663     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3664     mbs = m/bs; Nbs = N/cbs;
3665     if (n == PETSC_DECIDE) {
3666       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr);
3667     }
3668     nbs = n/cbs;
3669 
3670     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3671     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3672 
3673     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3674     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
3675     if (rank == size-1) {
3676       /* Check sum(nbs) = Nbs */
3677       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3678     }
3679 
3680     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3681     for (i=0; i<mbs; i++) {
3682       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3683       nnz = nnz/bs;
3684       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3685       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3686       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3687     }
3688     ierr = PetscFree(bindx);CHKERRQ(ierr);
3689 
3690     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3691     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3692     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3693     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3694     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3695     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3696     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3697     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3698   }
3699 
3700   /* numeric phase */
3701   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3702   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3703 
3704   for (i=0; i<m; i++) {
3705     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3706     Ii   = i + rstart;
3707     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3708     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3709   }
3710   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712   PetscFunctionReturn(0);
3713 }
3714