xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision fc8a9adeb7fcdc98711d755fa2dc544ddccf0f3e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
92   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
98   { \
99  \
100     brow = row/bs;  \
101     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
102     rmax = aimax[brow]; nrow = ailen[brow]; \
103     bcol = col/bs; \
104     ridx = row % bs; cidx = col % bs; \
105     low  = 0; high = nrow; \
106     while (high-low > 3) { \
107       t = (low+high)/2; \
108       if (rp[t] > bcol) high = t; \
109       else              low  = t; \
110     } \
111     for (_i=low; _i<high; _i++) { \
112       if (rp[_i] > bcol) break; \
113       if (rp[_i] == bcol) { \
114         bap = ap +  bs2*_i + bs*cidx + ridx; \
115         if (addv == ADD_VALUES) *bap += value;  \
116         else                    *bap  = value;  \
117         goto a_noinsert; \
118       } \
119     } \
120     if (a->nonew == 1) goto a_noinsert; \
121     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
122     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
123     N = nrow++ - 1;  \
124     /* shift up all the later entries in this row */ \
125     for (ii=N; ii>=_i; ii--) { \
126       rp[ii+1] = rp[ii]; \
127       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
128     } \
129     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
130     rp[_i]                      = bcol;  \
131     ap[bs2*_i + bs*cidx + ridx] = value;  \
132 a_noinsert:; \
133     ailen[brow] = nrow; \
134   }
135 
136 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
137   { \
138     brow = row/bs;  \
139     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
140     rmax = bimax[brow]; nrow = bilen[brow]; \
141     bcol = col/bs; \
142     ridx = row % bs; cidx = col % bs; \
143     low  = 0; high = nrow; \
144     while (high-low > 3) { \
145       t = (low+high)/2; \
146       if (rp[t] > bcol) high = t; \
147       else              low  = t; \
148     } \
149     for (_i=low; _i<high; _i++) { \
150       if (rp[_i] > bcol) break; \
151       if (rp[_i] == bcol) { \
152         bap = ap +  bs2*_i + bs*cidx + ridx; \
153         if (addv == ADD_VALUES) *bap += value;  \
154         else                    *bap  = value;  \
155         goto b_noinsert; \
156       } \
157     } \
158     if (b->nonew == 1) goto b_noinsert; \
159     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
160     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
161     N = nrow++ - 1;  \
162     /* shift up all the later entries in this row */ \
163     for (ii=N; ii>=_i; ii--) { \
164       rp[ii+1] = rp[ii]; \
165       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
166     } \
167     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
168     rp[_i]                      = bcol;  \
169     ap[bs2*_i + bs*cidx + ridx] = value;  \
170 b_noinsert:; \
171     bilen[brow] = nrow; \
172   }
173 
174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
175 {
176   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
177   MatScalar      value;
178   PetscBool      roworiented = baij->roworiented;
179   PetscErrorCode ierr;
180   PetscInt       i,j,row,col;
181   PetscInt       rstart_orig=mat->rmap->rstart;
182   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
183   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
184 
185   /* Some Variables required in the macro */
186   Mat         A     = baij->A;
187   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
188   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
189   MatScalar   *aa   =a->a;
190 
191   Mat         B     = baij->B;
192   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
193   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
194   MatScalar   *ba   =b->a;
195 
196   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
197   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
198   MatScalar *ap,*bap;
199 
200   PetscFunctionBegin;
201   for (i=0; i<m; i++) {
202     if (im[i] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
205 #endif
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j=0; j<n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i*n+j];
212           else             value = v[i+j*m];
213           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
214           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
215         } else if (in[j] < 0) continue;
216 #if defined(PETSC_USE_DEBUG)
217         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!baij->colmap) {
222               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
223             }
224 #if defined(PETSC_USE_CTABLE)
225             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
226             col  = col - 1;
227 #else
228             col = baij->colmap[in[j]/bs] - 1;
229 #endif
230             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
231               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
232               col  =  in[j];
233               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
234               B    = baij->B;
235               b    = (Mat_SeqBAIJ*)(B)->data;
236               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
237               ba   =b->a;
238             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
239             else col += in[j]%bs;
240           } else col = in[j];
241           if (roworiented) value = v[i*n+j];
242           else             value = v[i+j*m];
243           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
244           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
245         }
246       }
247     } else {
248       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
249       if (!baij->donotstash) {
250         mat->assembled = PETSC_FALSE;
251         if (roworiented) {
252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
253         } else {
254           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
263 {
264   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
265   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
266   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
267   PetscErrorCode    ierr;
268   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
269   PetscBool         roworiented=a->roworiented;
270   const PetscScalar *value     = v;
271   MatScalar         *ap,*aa = a->a,*bap;
272 
273   PetscFunctionBegin;
274   rp   = aj + ai[row];
275   ap   = aa + bs2*ai[row];
276   rmax = imax[row];
277   nrow = ailen[row];
278   value = v;
279   low = 0;
280   high = nrow;
281   while (high-low > 7) {
282     t = (low+high)/2;
283     if (rp[t] > col) high = t;
284     else             low  = t;
285   }
286   for (i=low; i<high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap +  bs2*i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii=0; ii<bs; ii++) {
293             for (jj=ii; jj<bs2; jj+=bs) {
294               bap[jj] += *value++;
295             }
296           }
297         } else {
298           for (ii=0; ii<bs; ii++) {
299             for (jj=ii; jj<bs2; jj+=bs) {
300               bap[jj] = *value++;
301             }
302           }
303         }
304       } else {
305         if (is == ADD_VALUES) {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj] += value[jj];
309             }
310             bap += bs;
311           }
312         } else {
313           for (ii=0; ii<bs; ii++,value+=bs) {
314             for (jj=0; jj<bs; jj++) {
315               bap[jj]  = value[jj];
316             }
317             bap += bs;
318           }
319         }
320       }
321       goto noinsert2;
322     }
323   }
324   if (nonew == 1) goto noinsert2;
325   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
326   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
327   N = nrow++ - 1; high++;
328   /* shift up all the later entries in this row */
329   for (ii=N; ii>=i; ii--) {
330     rp[ii+1] = rp[ii];
331     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
332   }
333   if (N >= i) {
334     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
335   }
336   rp[i] = col;
337   bap   = ap +  bs2*i;
338   if (roworiented) {
339     for (ii=0; ii<bs; ii++) {
340       for (jj=ii; jj<bs2; jj+=bs) {
341         bap[jj] = *value++;
342       }
343     }
344   } else {
345     for (ii=0; ii<bs; ii++) {
346       for (jj=0; jj<bs; jj++) {
347         *bap++ = *value++;
348       }
349     }
350   }
351   noinsert2:;
352   ailen[row] = nrow;
353   PetscFunctionReturn(0);
354 }
355 
356 /*
357     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
358     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
359 */
360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
361 {
362   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
363   const PetscScalar *value;
364   MatScalar         *barray     = baij->barray;
365   PetscBool         roworiented = baij->roworiented;
366   PetscErrorCode    ierr;
367   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
368   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
369   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
370 
371   PetscFunctionBegin;
372   if (!barray) {
373     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
374     baij->barray = barray;
375   }
376 
377   if (roworiented) stepval = (n-1)*bs;
378   else stepval = (m-1)*bs;
379 
380   for (i=0; i<m; i++) {
381     if (im[i] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
384 #endif
385     if (im[i] >= rstart && im[i] < rend) {
386       row = im[i] - rstart;
387       for (j=0; j<n; j++) {
388         /* If NumCol = 1 then a copy is not required */
389         if ((roworiented) && (n == 1)) {
390           barray = (MatScalar*)v + i*bs2;
391         } else if ((!roworiented) && (m == 1)) {
392           barray = (MatScalar*)v + j*bs2;
393         } else { /* Here a copy is required */
394           if (roworiented) {
395             value = v + (i*(stepval+bs) + j)*bs;
396           } else {
397             value = v + (j*(stepval+bs) + i)*bs;
398           }
399           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
400             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
401             barray += bs;
402           }
403           barray -= bs2;
404         }
405 
406         if (in[j] >= cstart && in[j] < cend) {
407           col  = in[j] - cstart;
408           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
409         } else if (in[j] < 0) continue;
410 #if defined(PETSC_USE_DEBUG)
411         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
412 #endif
413         else {
414           if (mat->was_assembled) {
415             if (!baij->colmap) {
416               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
417             }
418 
419 #if defined(PETSC_USE_DEBUG)
420 #if defined(PETSC_USE_CTABLE)
421             { PetscInt data;
422               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
423               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
424             }
425 #else
426             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
427 #endif
428 #endif
429 #if defined(PETSC_USE_CTABLE)
430             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431             col  = (col - 1)/bs;
432 #else
433             col = (baij->colmap[in[j]] - 1)/bs;
434 #endif
435             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
436               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
437               col  =  in[j];
438             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
439           } else col = in[j];
440           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
441         }
442       }
443     } else {
444       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #define HASH_KEY 0.6180339887
458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
462 {
463   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
464   PetscBool      roworiented = baij->roworiented;
465   PetscErrorCode ierr;
466   PetscInt       i,j,row,col;
467   PetscInt       rstart_orig=mat->rmap->rstart;
468   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
469   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
470   PetscReal      tmp;
471   MatScalar      **HD = baij->hd,value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   for (i=0; i<m; i++) {
478 #if defined(PETSC_USE_DEBUG)
479     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
480     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
481 #endif
482     row = im[i];
483     if (row >= rstart_orig && row < rend_orig) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486         if (roworiented) value = v[i*n+j];
487         else             value = v[i+j*m];
488         /* Look up PetscInto the Hash Table */
489         key = (row/bs)*Nbs+(col/bs)+1;
490         h1  = HASH(size,key,tmp);
491 
492 
493         idx = h1;
494 #if defined(PETSC_USE_DEBUG)
495         insert_ct++;
496         total_ct++;
497         if (HT[idx] != key) {
498           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
499           if (idx == size) {
500             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
501             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
502           }
503         }
504 #else
505         if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
509             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510           }
511         }
512 #endif
513         /* A HASH table entry is found, so insert the values at the correct address */
514         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
515         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
516       }
517     } else if (!baij->donotstash) {
518       if (roworiented) {
519         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
520       } else {
521         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
522       }
523     }
524   }
525 #if defined(PETSC_USE_DEBUG)
526   baij->ht_total_ct  += total_ct;
527   baij->ht_insert_ct += insert_ct;
528 #endif
529   PetscFunctionReturn(0);
530 }
531 
532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
533 {
534   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
535   PetscBool         roworiented = baij->roworiented;
536   PetscErrorCode    ierr;
537   PetscInt          i,j,ii,jj,row,col;
538   PetscInt          rstart=baij->rstartbs;
539   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
540   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
541   PetscReal         tmp;
542   MatScalar         **HD = baij->hd,*baij_a;
543   const PetscScalar *v_t,*value;
544 #if defined(PETSC_USE_DEBUG)
545   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
546 #endif
547 
548   PetscFunctionBegin;
549   if (roworiented) stepval = (n-1)*bs;
550   else stepval = (m-1)*bs;
551 
552   for (i=0; i<m; i++) {
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
555     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
556 #endif
557     row = im[i];
558     v_t = v + i*nbs2;
559     if (row >= rstart && row < rend) {
560       for (j=0; j<n; j++) {
561         col = in[j];
562 
563         /* Look up into the Hash Table */
564         key = row*Nbs+col+1;
565         h1  = HASH(size,key,tmp);
566 
567         idx = h1;
568 #if defined(PETSC_USE_DEBUG)
569         total_ct++;
570         insert_ct++;
571         if (HT[idx] != key) {
572           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
573           if (idx == size) {
574             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
575             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
576           }
577         }
578 #else
579         if (HT[idx] != key) {
580           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
581           if (idx == size) {
582             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
583             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
584           }
585         }
586 #endif
587         baij_a = HD[idx];
588         if (roworiented) {
589           /*value = v + i*(stepval+bs)*bs + j*bs;*/
590           /* value = v + (i*(stepval+bs)+j)*bs; */
591           value = v_t;
592           v_t  += bs;
593           if (addv == ADD_VALUES) {
594             for (ii=0; ii<bs; ii++,value+=stepval) {
595               for (jj=ii; jj<bs2; jj+=bs) {
596                 baij_a[jj] += *value++;
597               }
598             }
599           } else {
600             for (ii=0; ii<bs; ii++,value+=stepval) {
601               for (jj=ii; jj<bs2; jj+=bs) {
602                 baij_a[jj] = *value++;
603               }
604             }
605           }
606         } else {
607           value = v + j*(stepval+bs)*bs + i*bs;
608           if (addv == ADD_VALUES) {
609             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
610               for (jj=0; jj<bs; jj++) {
611                 baij_a[jj] += *value++;
612               }
613             }
614           } else {
615             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
616               for (jj=0; jj<bs; jj++) {
617                 baij_a[jj] = *value++;
618               }
619             }
620           }
621         }
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_DEBUG)
634   baij->ht_total_ct  += total_ct;
635   baij->ht_insert_ct += insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
641 {
642   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
643   PetscErrorCode ierr;
644   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
645   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
646 
647   PetscFunctionBegin;
648   for (i=0; i<m; i++) {
649     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
650     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
651     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
652       row = idxm[i] - bsrstart;
653       for (j=0; j<n; j++) {
654         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
655         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
656         if (idxn[j] >= bscstart && idxn[j] < bscend) {
657           col  = idxn[j] - bscstart;
658           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
659         } else {
660           if (!baij->colmap) {
661             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
662           }
663 #if defined(PETSC_USE_CTABLE)
664           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
665           data--;
666 #else
667           data = baij->colmap[idxn[j]/bs]-1;
668 #endif
669           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
670           else {
671             col  = data + idxn[j]%bs;
672             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
673           }
674         }
675       }
676     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
682 {
683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
684   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
685   PetscErrorCode ierr;
686   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
687   PetscReal      sum = 0.0;
688   MatScalar      *v;
689 
690   PetscFunctionBegin;
691   if (baij->size == 1) {
692     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
693   } else {
694     if (type == NORM_FROBENIUS) {
695       v  = amat->a;
696       nz = amat->nz*bs2;
697       for (i=0; i<nz; i++) {
698         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
699       }
700       v  = bmat->a;
701       nz = bmat->nz*bs2;
702       for (i=0; i<nz; i++) {
703         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
704       }
705       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
706       *nrm = PetscSqrtReal(*nrm);
707     } else if (type == NORM_1) { /* max column sum */
708       PetscReal *tmp,*tmp2;
709       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
710       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
711       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
712       v    = amat->a; jj = amat->j;
713       for (i=0; i<amat->nz; i++) {
714         for (j=0; j<bs; j++) {
715           col = bs*(cstart + *jj) + j; /* column index */
716           for (row=0; row<bs; row++) {
717             tmp[col] += PetscAbsScalar(*v);  v++;
718           }
719         }
720         jj++;
721       }
722       v = bmat->a; jj = bmat->j;
723       for (i=0; i<bmat->nz; i++) {
724         for (j=0; j<bs; j++) {
725           col = bs*garray[*jj] + j;
726           for (row=0; row<bs; row++) {
727             tmp[col] += PetscAbsScalar(*v); v++;
728           }
729         }
730         jj++;
731       }
732       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
733       *nrm = 0.0;
734       for (j=0; j<mat->cmap->N; j++) {
735         if (tmp2[j] > *nrm) *nrm = tmp2[j];
736       }
737       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
738     } else if (type == NORM_INFINITY) { /* max row sum */
739       PetscReal *sums;
740       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
741       sum  = 0.0;
742       for (j=0; j<amat->mbs; j++) {
743         for (row=0; row<bs; row++) sums[row] = 0.0;
744         v  = amat->a + bs2*amat->i[j];
745         nz = amat->i[j+1]-amat->i[j];
746         for (i=0; i<nz; i++) {
747           for (col=0; col<bs; col++) {
748             for (row=0; row<bs; row++) {
749               sums[row] += PetscAbsScalar(*v); v++;
750             }
751           }
752         }
753         v  = bmat->a + bs2*bmat->i[j];
754         nz = bmat->i[j+1]-bmat->i[j];
755         for (i=0; i<nz; i++) {
756           for (col=0; col<bs; col++) {
757             for (row=0; row<bs; row++) {
758               sums[row] += PetscAbsScalar(*v); v++;
759             }
760           }
761         }
762         for (row=0; row<bs; row++) {
763           if (sums[row] > sum) sum = sums[row];
764         }
765       }
766       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
767       ierr = PetscFree(sums);CHKERRQ(ierr);
768     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
781 {
782   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
783   Mat            A     = baij->A,B=baij->B;
784   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
785   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
786   PetscErrorCode ierr;
787   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
788   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
789   PetscInt       *HT,key;
790   MatScalar      **HD;
791   PetscReal      tmp;
792 #if defined(PETSC_USE_INFO)
793   PetscInt ct=0,max=0;
794 #endif
795 
796   PetscFunctionBegin;
797   if (baij->ht) PetscFunctionReturn(0);
798 
799   baij->ht_size = (PetscInt)(factor*nz);
800   ht_size       = baij->ht_size;
801 
802   /* Allocate Memory for Hash Table */
803   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
804   HD   = baij->hd;
805   HT   = baij->ht;
806 
807   /* Loop Over A */
808   for (i=0; i<a->mbs; i++) {
809     for (j=ai[i]; j<ai[i+1]; j++) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(ht_size,key,tmp);
815       for (k=0; k<ht_size; k++) {
816         if (!HT[(h1+k)%ht_size]) {
817           HT[(h1+k)%ht_size] = key;
818           HD[(h1+k)%ht_size] = a->a + j*bs2;
819           break;
820 #if defined(PETSC_USE_INFO)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(PETSC_USE_INFO)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for (i=0; i<b->mbs; i++) {
833     for (j=bi[i]; j<bi[i+1]; j++) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(ht_size,key,tmp);
838       for (k=0; k<ht_size; k++) {
839         if (!HT[(h1+k)%ht_size]) {
840           HT[(h1+k)%ht_size] = key;
841           HD[(h1+k)%ht_size] = b->a + j*bs2;
842           break;
843 #if defined(PETSC_USE_INFO)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(PETSC_USE_INFO)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(PETSC_USE_INFO)
857   for (i=0,j=0; i<ht_size; i++) {
858     if (HT[i]) j++;
859   }
860   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
868   PetscErrorCode ierr;
869   PetscInt       nstash,reallocs;
870 
871   PetscFunctionBegin;
872   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
873 
874   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
875   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
876   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
877   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
878   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
879   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
886   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
887   PetscErrorCode ierr;
888   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
889   PetscInt       *row,*col;
890   PetscBool      r1,r2,r3,other_disassembled;
891   MatScalar      *val;
892   PetscMPIInt    n;
893 
894   PetscFunctionBegin;
895   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
896   if (!baij->donotstash && !mat->nooffprocentries) {
897     while (1) {
898       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
899       if (!flg) break;
900 
901       for (i=0; i<n;) {
902         /* Now identify the consecutive vals belonging to the same row */
903         for (j=i,rstart=row[j]; j<n; j++) {
904           if (row[j] != rstart) break;
905         }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         /* Now assemble all these values with a single function call */
909         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
910         i    = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
914     /* Now process the block-stash. Since the values are stashed column-oriented,
915        set the roworiented flag to column oriented, and after MatSetValues()
916        restore the original flags */
917     r1 = baij->roworiented;
918     r2 = a->roworiented;
919     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
920 
921     baij->roworiented = PETSC_FALSE;
922     a->roworiented    = PETSC_FALSE;
923 
924     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
925     while (1) {
926       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
927       if (!flg) break;
928 
929       for (i=0; i<n;) {
930         /* Now identify the consecutive vals belonging to the same row */
931         for (j=i,rstart=row[j]; j<n; j++) {
932           if (row[j] != rstart) break;
933         }
934         if (j < n) ncols = j-i;
935         else       ncols = n-i;
936         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
937         i    = j;
938       }
939     }
940     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
941 
942     baij->roworiented = r1;
943     a->roworiented    = r2;
944 
945     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
946   }
947 
948   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
950 
951   /* determine if any processor has disassembled, if so we must
952      also disassemble ourselfs, in order that we may reassemble. */
953   /*
954      if nonzero structure of submatrix B cannot change then we know that
955      no processor disassembled thus we can skip this stuff
956   */
957   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
958     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     if (mat->was_assembled && !other_disassembled) {
960       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
961     }
962   }
963 
964   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
965     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
966   }
967   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
969 
970 #if defined(PETSC_USE_INFO)
971   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
972     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
973 
974     baij->ht_total_ct  = 0;
975     baij->ht_insert_ct = 0;
976   }
977 #endif
978   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
980 
981     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
982     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
983   }
984 
985   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
986 
987   baij->rowvalues = 0;
988 
989   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
990   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
991     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
992     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
998 #include <petscdraw.h>
999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1000 {
1001   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1002   PetscErrorCode    ierr;
1003   PetscMPIInt       rank = baij->rank;
1004   PetscInt          bs   = mat->rmap->bs;
1005   PetscBool         iascii,isdraw;
1006   PetscViewer       sviewer;
1007   PetscViewerFormat format;
1008 
1009   PetscFunctionBegin;
1010   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1011   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1012   if (iascii) {
1013     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1014     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1015       MatInfo info;
1016       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1017       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1018       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1019       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1020                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1021       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1022       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1023       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1024       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1025       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1026       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1027       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1028       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1031       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1032       PetscFunctionReturn(0);
1033     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1034       PetscFunctionReturn(0);
1035     }
1036   }
1037 
1038   if (isdraw) {
1039     PetscDraw draw;
1040     PetscBool isnull;
1041     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1042     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1043     if (isnull) PetscFunctionReturn(0);
1044   }
1045 
1046   {
1047     /* assemble the entire matrix onto first processor. */
1048     Mat         A;
1049     Mat_SeqBAIJ *Aloc;
1050     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1051     MatScalar   *a;
1052     const char  *matname;
1053 
1054     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1055     /* Perhaps this should be the type of mat? */
1056     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1059     } else {
1060       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1061     }
1062     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1063     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1064     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1066 
1067     /* copy over the A part */
1068     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1069     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1070     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1071 
1072     for (i=0; i<mbs; i++) {
1073       rvals[0] = bs*(baij->rstartbs + i);
1074       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1075       for (j=ai[i]; j<ai[i+1]; j++) {
1076         col = (baij->cstartbs+aj[j])*bs;
1077         for (k=0; k<bs; k++) {
1078           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1079           col++; a += bs;
1080         }
1081       }
1082     }
1083     /* copy over the B part */
1084     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1085     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1086     for (i=0; i<mbs; i++) {
1087       rvals[0] = bs*(baij->rstartbs + i);
1088       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1089       for (j=ai[i]; j<ai[i+1]; j++) {
1090         col = baij->garray[aj[j]]*bs;
1091         for (k=0; k<bs; k++) {
1092           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1093           col++; a += bs;
1094         }
1095       }
1096     }
1097     ierr = PetscFree(rvals);CHKERRQ(ierr);
1098     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     /*
1101        Everyone has to call to draw the matrix since the graphics waits are
1102        synchronized across all processors that share the PetscDraw object
1103     */
1104     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1105     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1106     if (!rank) {
1107       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1108       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1109     }
1110     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1111     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1118 {
1119   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1120   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1121   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1122   PetscErrorCode ierr;
1123   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1124   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1125   int            fd;
1126   PetscScalar    *column_values;
1127   FILE           *file;
1128   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1129   PetscInt       message_count,flowcontrolcount;
1130 
1131   PetscFunctionBegin;
1132   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1134   nz   = bs2*(A->nz + B->nz);
1135   rlen = mat->rmap->n;
1136   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1137   if (!rank) {
1138     header[0] = MAT_FILE_CLASSID;
1139     header[1] = mat->rmap->N;
1140     header[2] = mat->cmap->N;
1141 
1142     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1143     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1144     /* get largest number of rows any processor has */
1145     range = mat->rmap->range;
1146     for (i=1; i<size; i++) {
1147       rlen = PetscMax(rlen,range[i+1] - range[i]);
1148     }
1149   } else {
1150     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1151   }
1152 
1153   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1154   /* compute lengths of each row  */
1155   for (i=0; i<a->mbs; i++) {
1156     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1157   }
1158   /* store the row lengths to the file */
1159   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1160   if (!rank) {
1161     MPI_Status status;
1162     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1163     rlen = (range[1] - range[0])/bs;
1164     for (i=0; i<rlen; i++) {
1165       for (j=0; j<bs; j++) {
1166         row_lens[i*bs+j] = bs*crow_lens[i];
1167       }
1168     }
1169     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       rlen = (range[i+1] - range[i])/bs;
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       for (k=0; k<rlen; k++) {
1175         for (j=0; j<bs; j++) {
1176           row_lens[k*bs+j] = bs*crow_lens[k];
1177         }
1178       }
1179       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1182     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1183   } else {
1184     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1185     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1187   }
1188   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1189 
1190   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1191      information needed to make it for each row from a block row. This does require more communication but still not more than
1192      the communication needed for the nonzero values  */
1193   nzmax = nz; /*  space a largest processor needs */
1194   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1196   cnt   = 0;
1197   for (i=0; i<a->mbs; i++) {
1198     pcnt = cnt;
1199     for (j=B->i[i]; j<B->i[i+1]; j++) {
1200       if ((col = garray[B->j[j]]) > cstart) break;
1201       for (l=0; l<bs; l++) {
1202         column_indices[cnt++] = bs*col+l;
1203       }
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       for (l=0; l<bs; l++) {
1207         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1208       }
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1213       }
1214     }
1215     len = cnt - pcnt;
1216     for (k=1; k<bs; k++) {
1217       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1218       cnt += len;
1219     }
1220   }
1221   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1222 
1223   /* store the columns to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the numerical values */
1244   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<a->mbs; i++) {
1247     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1248     for (j=B->i[i]; j<B->i[i+1]; j++) {
1249       if (garray[B->j[j]] > cstart) break;
1250       for (l=0; l<bs; l++) {
1251         for (ll=0; ll<bs; ll++) {
1252           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1253         }
1254       }
1255       cnt += bs;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) {
1258       for (l=0; l<bs; l++) {
1259         for (ll=0; ll<bs; ll++) {
1260           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1261         }
1262       }
1263       cnt += bs;
1264     }
1265     for (; j<B->i[i+1]; j++) {
1266       for (l=0; l<bs; l++) {
1267         for (ll=0; ll<bs; ll++) {
1268           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1269         }
1270       }
1271       cnt += bs;
1272     }
1273     cnt += (bs-1)*rlen;
1274   }
1275   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1276 
1277   /* store the column values to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     MPI_Status status;
1281     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     for (i=1; i<size; i++) {
1283       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1284       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1285       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1292     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_values);CHKERRQ(ierr);
1296 
1297   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1298   if (file) {
1299     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1305 {
1306   PetscErrorCode ierr;
1307   PetscBool      iascii,isdraw,issocket,isbinary;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1314   if (iascii || isdraw || issocket) {
1315     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1316   } else if (isbinary) {
1317     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1323 {
1324   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328 #if defined(PETSC_USE_LOG)
1329   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1330 #endif
1331   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1332   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1333   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1334   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1335 #if defined(PETSC_USE_CTABLE)
1336   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1337 #else
1338   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1339 #endif
1340   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1341   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1342   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1343   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1344   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1345   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1346   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1347   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1348 
1349   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1358 #if defined(PETSC_HAVE_HYPRE)
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1360 #endif
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscInt       nt;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1374   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1375   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1376   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1377   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1379   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1385 {
1386   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1392   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   /* do nondiagonal part */
1404   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1405   /* do local part */
1406   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1407   /* add partial results together */
1408   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1409   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   /* do nondiagonal part */
1420   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1421   /* do local part */
1422   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1423   /* add partial results together */
1424   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1425   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 /*
1430   This only works correctly for square matrices where the subblock A->A is the
1431    diagonal block
1432 */
1433 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1440   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1445 {
1446   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1451   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1456 {
1457   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1458   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1459   PetscErrorCode ierr;
1460   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1461   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1462   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1463 
1464   PetscFunctionBegin;
1465   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1466   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1467   mat->getrowactive = PETSC_TRUE;
1468 
1469   if (!mat->rowvalues && (idx || v)) {
1470     /*
1471         allocate enough space to hold information from the longest row.
1472     */
1473     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1474     PetscInt    max = 1,mbs = mat->mbs,tmp;
1475     for (i=0; i<mbs; i++) {
1476       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1477       if (max < tmp) max = tmp;
1478     }
1479     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1480   }
1481   lrow = row - brstart;
1482 
1483   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1484   if (!v)   {pvA = 0; pvB = 0;}
1485   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1486   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1487   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1488   nztot = nzA + nzB;
1489 
1490   cmap = mat->garray;
1491   if (v  || idx) {
1492     if (nztot) {
1493       /* Sort by increasing column numbers, assuming A and B already sorted */
1494       PetscInt imark = -1;
1495       if (v) {
1496         *v = v_p = mat->rowvalues;
1497         for (i=0; i<nzB; i++) {
1498           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1499           else break;
1500         }
1501         imark = i;
1502         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1503         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1504       }
1505       if (idx) {
1506         *idx = idx_p = mat->rowindices;
1507         if (imark > -1) {
1508           for (i=0; i<imark; i++) {
1509             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1510           }
1511         } else {
1512           for (i=0; i<nzB; i++) {
1513             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1514             else break;
1515           }
1516           imark = i;
1517         }
1518         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1519         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1520       }
1521     } else {
1522       if (idx) *idx = 0;
1523       if (v)   *v   = 0;
1524     }
1525   }
1526   *nz  = nztot;
1527   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1528   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1535 
1536   PetscFunctionBegin;
1537   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1538   baij->getrowactive = PETSC_FALSE;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1543 {
1544   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1549   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1554 {
1555   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1556   Mat            A  = a->A,B = a->B;
1557   PetscErrorCode ierr;
1558   PetscReal      isend[5],irecv[5];
1559 
1560   PetscFunctionBegin;
1561   info->block_size = (PetscReal)matin->rmap->bs;
1562 
1563   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1564 
1565   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1566   isend[3] = info->memory;  isend[4] = info->mallocs;
1567 
1568   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1569 
1570   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1571   isend[3] += info->memory;  isend[4] += info->mallocs;
1572 
1573   if (flag == MAT_LOCAL) {
1574     info->nz_used      = isend[0];
1575     info->nz_allocated = isend[1];
1576     info->nz_unneeded  = isend[2];
1577     info->memory       = isend[3];
1578     info->mallocs      = isend[4];
1579   } else if (flag == MAT_GLOBAL_MAX) {
1580     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1581 
1582     info->nz_used      = irecv[0];
1583     info->nz_allocated = irecv[1];
1584     info->nz_unneeded  = irecv[2];
1585     info->memory       = irecv[3];
1586     info->mallocs      = irecv[4];
1587   } else if (flag == MAT_GLOBAL_SUM) {
1588     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1589 
1590     info->nz_used      = irecv[0];
1591     info->nz_allocated = irecv[1];
1592     info->nz_unneeded  = irecv[2];
1593     info->memory       = irecv[3];
1594     info->mallocs      = irecv[4];
1595   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1596   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1597   info->fill_ratio_needed = 0;
1598   info->factor_mallocs    = 0;
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1603 {
1604   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606 
1607   PetscFunctionBegin;
1608   switch (op) {
1609   case MAT_NEW_NONZERO_LOCATIONS:
1610   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1611   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1612   case MAT_KEEP_NONZERO_PATTERN:
1613   case MAT_NEW_NONZERO_LOCATION_ERR:
1614     MatCheckPreallocated(A,1);
1615     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1616     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1617     break;
1618   case MAT_ROW_ORIENTED:
1619     MatCheckPreallocated(A,1);
1620     a->roworiented = flg;
1621 
1622     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1623     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1624     break;
1625   case MAT_NEW_DIAGONALS:
1626     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1627     break;
1628   case MAT_IGNORE_OFF_PROC_ENTRIES:
1629     a->donotstash = flg;
1630     break;
1631   case MAT_USE_HASH_TABLE:
1632     a->ht_flag = flg;
1633     a->ht_fact = 1.39;
1634     break;
1635   case MAT_SYMMETRIC:
1636   case MAT_STRUCTURALLY_SYMMETRIC:
1637   case MAT_HERMITIAN:
1638   case MAT_SUBMAT_SINGLEIS:
1639   case MAT_SYMMETRY_ETERNAL:
1640     MatCheckPreallocated(A,1);
1641     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1642     break;
1643   default:
1644     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1650 {
1651   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1652   Mat_SeqBAIJ    *Aloc;
1653   Mat            B;
1654   PetscErrorCode ierr;
1655   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1656   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1657   MatScalar      *a;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1661     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1662     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1663     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1664     /* Do not know preallocation information, but must set block size */
1665     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1666   } else {
1667     B = *matout;
1668   }
1669 
1670   /* copy over the A part */
1671   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1672   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1673   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1674 
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstartbs + i);
1677     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = (baij->cstartbs+aj[j])*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682 
1683         col++; a += bs;
1684       }
1685     }
1686   }
1687   /* copy over the B part */
1688   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1689   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1690   for (i=0; i<mbs; i++) {
1691     rvals[0] = bs*(baij->rstartbs + i);
1692     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1693     for (j=ai[i]; j<ai[i+1]; j++) {
1694       col = baij->garray[aj[j]]*bs;
1695       for (k=0; k<bs; k++) {
1696         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1697         col++;
1698         a += bs;
1699       }
1700     }
1701   }
1702   ierr = PetscFree(rvals);CHKERRQ(ierr);
1703   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705 
1706   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1707   else {
1708     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1714 {
1715   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1716   Mat            a     = baij->A,b = baij->B;
1717   PetscErrorCode ierr;
1718   PetscInt       s1,s2,s3;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1722   if (rr) {
1723     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1724     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1725     /* Overlap communication with computation. */
1726     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   }
1728   if (ll) {
1729     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1730     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1731     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1732   }
1733   /* scale  the diagonal block */
1734   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1735 
1736   if (rr) {
1737     /* Do a scatter end and then right scale the off-diagonal block */
1738     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1739     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1745 {
1746   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1747   PetscInt      *lrows;
1748   PetscInt       r, len;
1749   PetscBool      cong;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   /* get locally owned rows */
1754   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1755   /* fix right hand side if needed */
1756   if (x && b) {
1757     const PetscScalar *xx;
1758     PetscScalar       *bb;
1759 
1760     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1761     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1762     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1763     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1764     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1765   }
1766 
1767   /* actually zap the local rows */
1768   /*
1769         Zero the required rows. If the "diagonal block" of the matrix
1770      is square and the user wishes to set the diagonal we use separate
1771      code so that MatSetValues() is not called for each diagonal allocating
1772      new memory, thus calling lots of mallocs and slowing things down.
1773 
1774   */
1775   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1776   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1777   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1778   if ((diag != 0.0) && cong) {
1779     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1780   } else if (diag != 0.0) {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1782     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1783        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1784     for (r = 0; r < len; ++r) {
1785       const PetscInt row = lrows[r] + A->rmap->rstart;
1786       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1787     }
1788     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790   } else {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1792   }
1793   ierr = PetscFree(lrows);CHKERRQ(ierr);
1794 
1795   /* only change matrix nonzero state if pattern was allowed to be changed */
1796   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1797     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1798     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1804 {
1805   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1806   PetscErrorCode    ierr;
1807   PetscMPIInt       n = A->rmap->n;
1808   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1809   PetscInt          *lrows,*owners = A->rmap->range;
1810   PetscSFNode       *rrows;
1811   PetscSF           sf;
1812   const PetscScalar *xx;
1813   PetscScalar       *bb,*mask;
1814   Vec               xmask,lmask;
1815   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1816   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1817   PetscScalar       *aa;
1818 
1819   PetscFunctionBegin;
1820   /* Create SF where leaves are input rows and roots are owned rows */
1821   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1822   for (r = 0; r < n; ++r) lrows[r] = -1;
1823   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1824   for (r = 0; r < N; ++r) {
1825     const PetscInt idx   = rows[r];
1826     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1827     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1828       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1829     }
1830     rrows[r].rank  = p;
1831     rrows[r].index = rows[r] - owners[p];
1832   }
1833   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1834   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1835   /* Collect flags for rows to be zeroed */
1836   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1837   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1838   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1839   /* Compress and put in row numbers */
1840   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1841   /* zero diagonal part of matrix */
1842   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1843   /* handle off diagonal part of matrix */
1844   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1845   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1846   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1847   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1848   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1849   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1851   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1852   if (x) {
1853     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1854     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1855     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1856     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1857   }
1858   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1859   /* remove zeroed rows of off diagonal matrix */
1860   for (i = 0; i < len; ++i) {
1861     row   = lrows[i];
1862     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1863     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1864     for (k = 0; k < count; ++k) {
1865       aa[0] = 0.0;
1866       aa   += bs;
1867     }
1868   }
1869   /* loop over all elements of off process part of matrix zeroing removed columns*/
1870   for (i = 0; i < l->B->rmap->N; ++i) {
1871     row = i/bs;
1872     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1873       for (k = 0; k < bs; ++k) {
1874         col = bs*baij->j[j] + k;
1875         if (PetscAbsScalar(mask[col])) {
1876           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1877           if (x) bb[i] -= aa[0]*xx[col];
1878           aa[0] = 0.0;
1879         }
1880       }
1881     }
1882   }
1883   if (x) {
1884     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1885     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1886   }
1887   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1888   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* only change matrix nonzero state if pattern was allowed to be changed */
1892   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1893     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1894     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1900 {
1901   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1910 
1911 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1912 {
1913   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1914   Mat            a,b,c,d;
1915   PetscBool      flg;
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   a = matA->A; b = matA->B;
1920   c = matB->A; d = matB->B;
1921 
1922   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1923   if (flg) {
1924     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1925   }
1926   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1931 {
1932   PetscErrorCode ierr;
1933   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1934   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1935 
1936   PetscFunctionBegin;
1937   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1938   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1939     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1940   } else {
1941     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1942     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1943   }
1944   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1949 {
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1958 {
1959   PetscErrorCode ierr;
1960   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1961   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1962   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1963 
1964   PetscFunctionBegin;
1965   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1970 {
1971   PetscErrorCode ierr;
1972   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1973   PetscBLASInt   bnz,one=1;
1974   Mat_SeqBAIJ    *x,*y;
1975   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x    = (Mat_SeqBAIJ*)xx->A->data;
1981     y    = (Mat_SeqBAIJ*)yy->A->data;
1982     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1983     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1984     x    = (Mat_SeqBAIJ*)xx->B->data;
1985     y    = (Mat_SeqBAIJ*)yy->B->data;
1986     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1987     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1988     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1989   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1991   } else {
1992     Mat      B;
1993     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1994     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1995     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1996     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1997     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1998     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1999     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2000     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2001     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2002     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2003     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2004     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2005     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2006     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2007     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2008     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2020   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2025 {
2026   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2031   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   IS             iscol_local;
2039   PetscInt       csize;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2043   if (call == MAT_REUSE_MATRIX) {
2044     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2045     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2046   } else {
2047     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2048   }
2049   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2050   if (call == MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2052     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /*
2058   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2059   in local and then by concatenating the local matrices the end result.
2060   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2061   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2062 */
2063 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2064 {
2065   PetscErrorCode ierr;
2066   PetscMPIInt    rank,size;
2067   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2068   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2069   Mat            M,Mreuse;
2070   MatScalar      *vwork,*aa;
2071   MPI_Comm       comm;
2072   IS             isrow_new, iscol_new;
2073   Mat_SeqBAIJ    *aij;
2074 
2075   PetscFunctionBegin;
2076   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2079   /* The compression and expansion should be avoided. Doesn't point
2080      out errors, might change the indices, hence buggey */
2081   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2082   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2083 
2084   if (call ==  MAT_REUSE_MATRIX) {
2085     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2086     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2087     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2088   } else {
2089     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2090   }
2091   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2092   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2093   /*
2094       m - number of local rows
2095       n - number of columns (same on all processors)
2096       rstart - first row in new global matrix generated
2097   */
2098   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2099   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2100   m    = m/bs;
2101   n    = n/bs;
2102 
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n*bs) { /* square matrix */
2117         nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize/bs;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2130     for (i=0; i<m; i++) {
2131       jend = ii[i+1] - ii[i];
2132       olen = 0;
2133       dlen = 0;
2134       for (j=0; j<jend; j++) {
2135         if (*jj < rstart || *jj >= rend) olen++;
2136         else dlen++;
2137         jj++;
2138       }
2139       olens[i] = olen;
2140       dlens[i] = dlen;
2141     }
2142     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2143     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2144     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2145     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2146     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2147     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2148   } else {
2149     PetscInt ml,nl;
2150 
2151     M    = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2163   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2164   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2165   ii   = aij->i;
2166   jj   = aij->j;
2167   aa   = aij->a;
2168   for (i=0; i<m; i++) {
2169     row   = rstart/bs + i;
2170     nz    = ii[i+1] - ii[i];
2171     cwork = jj;     jj += nz;
2172     vwork = aa;     aa += nz*bs*bs;
2173     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2174   }
2175 
2176   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   *newmat = M;
2179 
2180   /* save submatrix used in processor for next request */
2181   if (call ==  MAT_INITIAL_MATRIX) {
2182     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2183     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2184   }
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2189 {
2190   MPI_Comm       comm,pcomm;
2191   PetscInt       clocal_size,nrows;
2192   const PetscInt *rows;
2193   PetscMPIInt    size;
2194   IS             crowp,lcolp;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2199   /* make a collective version of 'rowp' */
2200   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2201   if (pcomm==comm) {
2202     crowp = rowp;
2203   } else {
2204     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2205     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2206     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2207     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2208   }
2209   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2210   /* make a local version of 'colp' */
2211   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2212   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2213   if (size==1) {
2214     lcolp = colp;
2215   } else {
2216     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2217   }
2218   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2219   /* now we just get the submatrix */
2220   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2221   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2222   /* clean up */
2223   if (pcomm!=comm) {
2224     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2225   }
2226   if (size>1) {
2227     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2233 {
2234   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2235   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2236 
2237   PetscFunctionBegin;
2238   if (nghosts) *nghosts = B->nbs;
2239   if (ghosts) *ghosts = baij->garray;
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2244 {
2245   Mat            B;
2246   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2247   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2248   Mat_SeqAIJ     *b;
2249   PetscErrorCode ierr;
2250   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2251   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2252   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2253 
2254   PetscFunctionBegin;
2255   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2256   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2257 
2258   /* ----------------------------------------------------------------
2259      Tell every processor the number of nonzeros per row
2260   */
2261   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2262   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2263     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2264   }
2265   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2266   displs    = recvcounts + size;
2267   for (i=0; i<size; i++) {
2268     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2269     displs[i]     = A->rmap->range[i]/bs;
2270   }
2271 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2272   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2273 #else
2274   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2275   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2276 #endif
2277   /* ---------------------------------------------------------------
2278      Create the sequential matrix of the same type as the local block diagonal
2279   */
2280   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2281   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2282   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2283   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2284   b    = (Mat_SeqAIJ*)B->data;
2285 
2286   /*--------------------------------------------------------------------
2287     Copy my part of matrix column indices over
2288   */
2289   sendcount  = ad->nz + bd->nz;
2290   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2291   a_jsendbuf = ad->j;
2292   b_jsendbuf = bd->j;
2293   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2294   cnt        = 0;
2295   for (i=0; i<n; i++) {
2296 
2297     /* put in lower diagonal portion */
2298     m = bd->i[i+1] - bd->i[i];
2299     while (m > 0) {
2300       /* is it above diagonal (in bd (compressed) numbering) */
2301       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2302       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2303       m--;
2304     }
2305 
2306     /* put in diagonal portion */
2307     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2308       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2309     }
2310 
2311     /* put in upper diagonal portion */
2312     while (m-- > 0) {
2313       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2314     }
2315   }
2316   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2317 
2318   /*--------------------------------------------------------------------
2319     Gather all column indices to all processors
2320   */
2321   for (i=0; i<size; i++) {
2322     recvcounts[i] = 0;
2323     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2324       recvcounts[i] += lens[j];
2325     }
2326   }
2327   displs[0] = 0;
2328   for (i=1; i<size; i++) {
2329     displs[i] = displs[i-1] + recvcounts[i-1];
2330   }
2331 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2332   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2333 #else
2334   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2335 #endif
2336   /*--------------------------------------------------------------------
2337     Assemble the matrix into useable form (note numerical values not yet set)
2338   */
2339   /* set the b->ilen (length of each row) values */
2340   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2341   /* set the b->i indices */
2342   b->i[0] = 0;
2343   for (i=1; i<=A->rmap->N/bs; i++) {
2344     b->i[i] = b->i[i-1] + lens[i-1];
2345   }
2346   ierr = PetscFree(lens);CHKERRQ(ierr);
2347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2350 
2351   if (A->symmetric) {
2352     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2353   } else if (A->hermitian) {
2354     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2355   } else if (A->structurally_symmetric) {
2356     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2357   }
2358   *newmat = B;
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2363 {
2364   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2365   PetscErrorCode ierr;
2366   Vec            bb1 = 0;
2367 
2368   PetscFunctionBegin;
2369   if (flag == SOR_APPLY_UPPER) {
2370     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2371     PetscFunctionReturn(0);
2372   }
2373 
2374   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2375     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2376   }
2377 
2378   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2379     if (flag & SOR_ZERO_INITIAL_GUESS) {
2380       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2381       its--;
2382     }
2383 
2384     while (its--) {
2385       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2386       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2387 
2388       /* update rhs: bb1 = bb - B*x */
2389       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2390       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2391 
2392       /* local sweep */
2393       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2394     }
2395   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2396     if (flag & SOR_ZERO_INITIAL_GUESS) {
2397       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2398       its--;
2399     }
2400     while (its--) {
2401       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2403 
2404       /* update rhs: bb1 = bb - B*x */
2405       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2406       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2407 
2408       /* local sweep */
2409       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2410     }
2411   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2412     if (flag & SOR_ZERO_INITIAL_GUESS) {
2413       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2414       its--;
2415     }
2416     while (its--) {
2417       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2418       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2419 
2420       /* update rhs: bb1 = bb - B*x */
2421       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2422       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2423 
2424       /* local sweep */
2425       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2426     }
2427   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2428 
2429   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2434 {
2435   PetscErrorCode ierr;
2436   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2437   PetscInt       N,i,*garray = aij->garray;
2438   PetscInt       ib,jb,bs = A->rmap->bs;
2439   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2440   MatScalar      *a_val = a_aij->a;
2441   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2442   MatScalar      *b_val = b_aij->a;
2443   PetscReal      *work;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2447   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2448   if (type == NORM_2) {
2449     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2450       for (jb=0; jb<bs; jb++) {
2451         for (ib=0; ib<bs; ib++) {
2452           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2453           a_val++;
2454         }
2455       }
2456     }
2457     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2458       for (jb=0; jb<bs; jb++) {
2459         for (ib=0; ib<bs; ib++) {
2460           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2461           b_val++;
2462         }
2463       }
2464     }
2465   } else if (type == NORM_1) {
2466     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2467       for (jb=0; jb<bs; jb++) {
2468         for (ib=0; ib<bs; ib++) {
2469           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2470           a_val++;
2471         }
2472       }
2473     }
2474     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2475       for (jb=0; jb<bs; jb++) {
2476        for (ib=0; ib<bs; ib++) {
2477           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2478           b_val++;
2479         }
2480       }
2481     }
2482   } else if (type == NORM_INFINITY) {
2483     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2484       for (jb=0; jb<bs; jb++) {
2485         for (ib=0; ib<bs; ib++) {
2486           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2487           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2488           a_val++;
2489         }
2490       }
2491     }
2492     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2493       for (jb=0; jb<bs; jb++) {
2494         for (ib=0; ib<bs; ib++) {
2495           int col = garray[b_aij->j[i]] * bs + jb;
2496           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2497           b_val++;
2498         }
2499       }
2500     }
2501   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2502   if (type == NORM_INFINITY) {
2503     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2504   } else {
2505     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2506   }
2507   ierr = PetscFree(work);CHKERRQ(ierr);
2508   if (type == NORM_2) {
2509     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2510   }
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2515 {
2516   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2521   A->factorerrortype             = a->A->factorerrortype;
2522   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2523   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2528 {
2529   PetscErrorCode ierr;
2530   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2531   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2532 
2533   PetscFunctionBegin;
2534   if (!Y->preallocated) {
2535     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2536   } else if (!aij->nz) {
2537     PetscInt nonew = aij->nonew;
2538     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2539     aij->nonew = nonew;
2540   }
2541   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2546 {
2547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2552   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2553   if (d) {
2554     PetscInt rstart;
2555     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2556     *d += rstart/A->rmap->bs;
2557 
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2563 {
2564   PetscFunctionBegin;
2565   *a = ((Mat_MPIBAIJ*)A->data)->A;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /* -------------------------------------------------------------------*/
2570 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2571                                        MatGetRow_MPIBAIJ,
2572                                        MatRestoreRow_MPIBAIJ,
2573                                        MatMult_MPIBAIJ,
2574                                 /* 4*/ MatMultAdd_MPIBAIJ,
2575                                        MatMultTranspose_MPIBAIJ,
2576                                        MatMultTransposeAdd_MPIBAIJ,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                 /*10*/ 0,
2581                                        0,
2582                                        0,
2583                                        MatSOR_MPIBAIJ,
2584                                        MatTranspose_MPIBAIJ,
2585                                 /*15*/ MatGetInfo_MPIBAIJ,
2586                                        MatEqual_MPIBAIJ,
2587                                        MatGetDiagonal_MPIBAIJ,
2588                                        MatDiagonalScale_MPIBAIJ,
2589                                        MatNorm_MPIBAIJ,
2590                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2591                                        MatAssemblyEnd_MPIBAIJ,
2592                                        MatSetOption_MPIBAIJ,
2593                                        MatZeroEntries_MPIBAIJ,
2594                                 /*24*/ MatZeroRows_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*29*/ MatSetUp_MPIBAIJ,
2600                                        0,
2601                                        0,
2602                                        MatGetDiagonalBlock_MPIBAIJ,
2603                                        0,
2604                                 /*34*/ MatDuplicate_MPIBAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                 /*39*/ MatAXPY_MPIBAIJ,
2610                                        MatCreateSubMatrices_MPIBAIJ,
2611                                        MatIncreaseOverlap_MPIBAIJ,
2612                                        MatGetValues_MPIBAIJ,
2613                                        MatCopy_MPIBAIJ,
2614                                 /*44*/ 0,
2615                                        MatScale_MPIBAIJ,
2616                                        MatShift_MPIBAIJ,
2617                                        0,
2618                                        MatZeroRowsColumns_MPIBAIJ,
2619                                 /*49*/ 0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2625                                        0,
2626                                        MatSetUnfactored_MPIBAIJ,
2627                                        MatPermute_MPIBAIJ,
2628                                        MatSetValuesBlocked_MPIBAIJ,
2629                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2630                                        MatDestroy_MPIBAIJ,
2631                                        MatView_MPIBAIJ,
2632                                        0,
2633                                        0,
2634                                 /*64*/ 0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*74*/ 0,
2645                                        MatFDColoringApply_BAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*79*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        MatLoad_MPIBAIJ,
2654                                 /*84*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*89*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*94*/ 0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*99*/ 0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*104*/0,
2675                                        MatRealPart_MPIBAIJ,
2676                                        MatImaginaryPart_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*109*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatMissingDiagonal_MPIBAIJ,
2684                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2685                                        0,
2686                                        MatGetGhosts_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                 /*119*/0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        MatGetMultiProcBlock_MPIBAIJ,
2694                                 /*124*/0,
2695                                        MatGetColumnNorms_MPIBAIJ,
2696                                        MatInvertBlockDiagonal_MPIBAIJ,
2697                                        0,
2698                                        0,
2699                                /*129*/ 0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /*134*/ 0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                /*139*/ MatSetBlockSizes_Default,
2710                                        0,
2711                                        0,
2712                                        MatFDColoringSetUp_MPIXAIJ,
2713                                        0,
2714                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2715 };
2716 
2717 
2718 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2719 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2720 
2721 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2722 {
2723   PetscInt       m,rstart,cstart,cend;
2724   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2725   const PetscInt *JJ    =0;
2726   PetscScalar    *values=0;
2727   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2732   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2733   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2734   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2735   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2736   m      = B->rmap->n/bs;
2737   rstart = B->rmap->rstart/bs;
2738   cstart = B->cmap->rstart/bs;
2739   cend   = B->cmap->rend/bs;
2740 
2741   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2742   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2743   for (i=0; i<m; i++) {
2744     nz = ii[i+1] - ii[i];
2745     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2746     nz_max = PetscMax(nz_max,nz);
2747     dlen   = 0;
2748     olen   = 0;
2749     JJ     = jj + ii[i];
2750     for (j=0; j<nz; j++) {
2751       if (*JJ < cstart || *JJ >= cend) olen++;
2752       else dlen++;
2753       JJ++;
2754     }
2755     d_nnz[i] = dlen;
2756     o_nnz[i] = olen;
2757   }
2758   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2759   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2760 
2761   values = (PetscScalar*)V;
2762   if (!values) {
2763     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2764   }
2765   for (i=0; i<m; i++) {
2766     PetscInt          row    = i + rstart;
2767     PetscInt          ncols  = ii[i+1] - ii[i];
2768     const PetscInt    *icols = jj + ii[i];
2769     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2770       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2771       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2772     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2773       PetscInt j;
2774       for (j=0; j<ncols; j++) {
2775         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2776         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2777       }
2778     }
2779   }
2780 
2781   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2782   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 /*@C
2789    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2790    (the default parallel PETSc format).
2791 
2792    Collective on MPI_Comm
2793 
2794    Input Parameters:
2795 +  B - the matrix
2796 .  bs - the block size
2797 .  i - the indices into j for the start of each local row (starts with zero)
2798 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2799 -  v - optional values in the matrix
2800 
2801    Level: developer
2802 
2803    Notes:
2804     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2805    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2806    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2807    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2808    block column and the second index is over columns within a block.
2809 
2810 .keywords: matrix, aij, compressed row, sparse, parallel
2811 
2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2813 @*/
2814 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2815 {
2816   PetscErrorCode ierr;
2817 
2818   PetscFunctionBegin;
2819   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2820   PetscValidType(B,1);
2821   PetscValidLogicalCollectiveInt(B,bs,2);
2822   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2827 {
2828   Mat_MPIBAIJ    *b;
2829   PetscErrorCode ierr;
2830   PetscInt       i;
2831   PetscMPIInt    size;
2832 
2833   PetscFunctionBegin;
2834   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2835   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2836   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2838 
2839   if (d_nnz) {
2840     for (i=0; i<B->rmap->n/bs; i++) {
2841       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2842     }
2843   }
2844   if (o_nnz) {
2845     for (i=0; i<B->rmap->n/bs; i++) {
2846       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2847     }
2848   }
2849 
2850   b      = (Mat_MPIBAIJ*)B->data;
2851   b->bs2 = bs*bs;
2852   b->mbs = B->rmap->n/bs;
2853   b->nbs = B->cmap->n/bs;
2854   b->Mbs = B->rmap->N/bs;
2855   b->Nbs = B->cmap->N/bs;
2856 
2857   for (i=0; i<=b->size; i++) {
2858     b->rangebs[i] = B->rmap->range[i]/bs;
2859   }
2860   b->rstartbs = B->rmap->rstart/bs;
2861   b->rendbs   = B->rmap->rend/bs;
2862   b->cstartbs = B->cmap->rstart/bs;
2863   b->cendbs   = B->cmap->rend/bs;
2864 
2865 #if defined(PETSC_USE_CTABLE)
2866   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2867 #else
2868   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2869 #endif
2870   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2871   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2872   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2873 
2874   /* Because the B will have been resized we simply destroy it and create a new one each time */
2875   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2876   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2877   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2878   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2879   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2880   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2881 
2882   if (!B->preallocated) {
2883     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2884     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2885     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2886     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2887     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2888   }
2889 
2890   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2891   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2892   B->preallocated  = PETSC_TRUE;
2893   B->was_assembled = PETSC_FALSE;
2894   B->assembled     = PETSC_FALSE;
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2899 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2900 
2901 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2902 {
2903   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2904   PetscErrorCode ierr;
2905   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2906   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2907   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2908 
2909   PetscFunctionBegin;
2910   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2911   ii[0] = 0;
2912   for (i=0; i<M; i++) {
2913     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2914     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2915     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2916     /* remove one from count of matrix has diagonal */
2917     for (j=id[i]; j<id[i+1]; j++) {
2918       if (jd[j] == i) {ii[i+1]--;break;}
2919     }
2920   }
2921   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2922   cnt  = 0;
2923   for (i=0; i<M; i++) {
2924     for (j=io[i]; j<io[i+1]; j++) {
2925       if (garray[jo[j]] > rstart) break;
2926       jj[cnt++] = garray[jo[j]];
2927     }
2928     for (k=id[i]; k<id[i+1]; k++) {
2929       if (jd[k] != i) {
2930         jj[cnt++] = rstart + jd[k];
2931       }
2932     }
2933     for (; j<io[i+1]; j++) {
2934       jj[cnt++] = garray[jo[j]];
2935     }
2936   }
2937   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2942 
2943 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2944 
2945 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2946 {
2947   PetscErrorCode ierr;
2948   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2949   Mat            B;
2950   Mat_MPIAIJ     *b;
2951 
2952   PetscFunctionBegin;
2953   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2954 
2955   if (reuse == MAT_REUSE_MATRIX) {
2956     B = *newmat;
2957   } else {
2958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2959     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2960     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2961     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2962     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2963     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2964   }
2965   b = (Mat_MPIAIJ*) B->data;
2966 
2967   if (reuse == MAT_REUSE_MATRIX) {
2968     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2969     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2970   } else {
2971     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2972     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2973     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2974     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2975     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   }
2979   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2980   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2981 
2982   if (reuse == MAT_INPLACE_MATRIX) {
2983     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2984   } else {
2985    *newmat = B;
2986   }
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 /*MC
2991    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2992 
2993    Options Database Keys:
2994 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2995 . -mat_block_size <bs> - set the blocksize used to store the matrix
2996 - -mat_use_hash_table <fact>
2997 
2998   Level: beginner
2999 
3000 .seealso: MatCreateMPIBAIJ
3001 M*/
3002 
3003 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3004 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3005 
3006 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3007 {
3008   Mat_MPIBAIJ    *b;
3009   PetscErrorCode ierr;
3010   PetscBool      flg = PETSC_FALSE;
3011 
3012   PetscFunctionBegin;
3013   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3014   B->data = (void*)b;
3015 
3016   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3017   B->assembled = PETSC_FALSE;
3018 
3019   B->insertmode = NOT_SET_VALUES;
3020   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3021   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3022 
3023   /* build local table of row and column ownerships */
3024   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3025 
3026   /* build cache for off array entries formed */
3027   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3028 
3029   b->donotstash  = PETSC_FALSE;
3030   b->colmap      = NULL;
3031   b->garray      = NULL;
3032   b->roworiented = PETSC_TRUE;
3033 
3034   /* stuff used in block assembly */
3035   b->barray = 0;
3036 
3037   /* stuff used for matrix vector multiply */
3038   b->lvec  = 0;
3039   b->Mvctx = 0;
3040 
3041   /* stuff for MatGetRow() */
3042   b->rowindices   = 0;
3043   b->rowvalues    = 0;
3044   b->getrowactive = PETSC_FALSE;
3045 
3046   /* hash table stuff */
3047   b->ht           = 0;
3048   b->hd           = 0;
3049   b->ht_size      = 0;
3050   b->ht_flag      = PETSC_FALSE;
3051   b->ht_fact      = 0;
3052   b->ht_total_ct  = 0;
3053   b->ht_insert_ct = 0;
3054 
3055   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3056   b->ijonly = PETSC_FALSE;
3057 
3058 
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3062 #if defined(PETSC_HAVE_HYPRE)
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3064 #endif
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3073   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3074 
3075   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3076   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3077   if (flg) {
3078     PetscReal fact = 1.39;
3079     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3080     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3081     if (fact <= 1.0) fact = 1.39;
3082     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3083     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3084   }
3085   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 /*MC
3090    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3091 
3092    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3093    and MATMPIBAIJ otherwise.
3094 
3095    Options Database Keys:
3096 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3097 
3098   Level: beginner
3099 
3100 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3101 M*/
3102 
3103 /*@C
3104    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3105    (block compressed row).  For good matrix assembly performance
3106    the user should preallocate the matrix storage by setting the parameters
3107    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3108    performance can be increased by more than a factor of 50.
3109 
3110    Collective on Mat
3111 
3112    Input Parameters:
3113 +  B - the matrix
3114 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3115           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3116 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3117            submatrix  (same for all local rows)
3118 .  d_nnz - array containing the number of block nonzeros in the various block rows
3119            of the in diagonal portion of the local (possibly different for each block
3120            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3121            set it even if it is zero.
3122 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3123            submatrix (same for all local rows).
3124 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3125            off-diagonal portion of the local submatrix (possibly different for
3126            each block row) or NULL.
3127 
3128    If the *_nnz parameter is given then the *_nz parameter is ignored
3129 
3130    Options Database Keys:
3131 +   -mat_block_size - size of the blocks to use
3132 -   -mat_use_hash_table <fact>
3133 
3134    Notes:
3135    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3136    than it must be used on all processors that share the object for that argument.
3137 
3138    Storage Information:
3139    For a square global matrix we define each processor's diagonal portion
3140    to be its local rows and the corresponding columns (a square submatrix);
3141    each processor's off-diagonal portion encompasses the remainder of the
3142    local matrix (a rectangular submatrix).
3143 
3144    The user can specify preallocated storage for the diagonal part of
3145    the local submatrix with either d_nz or d_nnz (not both).  Set
3146    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3147    memory allocation.  Likewise, specify preallocated storage for the
3148    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3149 
3150    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3151    the figure below we depict these three local rows and all columns (0-11).
3152 
3153 .vb
3154            0 1 2 3 4 5 6 7 8 9 10 11
3155           --------------------------
3156    row 3  |o o o d d d o o o o  o  o
3157    row 4  |o o o d d d o o o o  o  o
3158    row 5  |o o o d d d o o o o  o  o
3159           --------------------------
3160 .ve
3161 
3162    Thus, any entries in the d locations are stored in the d (diagonal)
3163    submatrix, and any entries in the o locations are stored in the
3164    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3165    stored simply in the MATSEQBAIJ format for compressed row storage.
3166 
3167    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3168    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3169    In general, for PDE problems in which most nonzeros are near the diagonal,
3170    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3171    or you will get TERRIBLE performance; see the users' manual chapter on
3172    matrices.
3173 
3174    You can call MatGetInfo() to get information on how effective the preallocation was;
3175    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3176    You can also run with the option -info and look for messages with the string
3177    malloc in them to see if additional memory allocation was needed.
3178 
3179    Level: intermediate
3180 
3181 .keywords: matrix, block, aij, compressed row, sparse, parallel
3182 
3183 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3184 @*/
3185 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3186 {
3187   PetscErrorCode ierr;
3188 
3189   PetscFunctionBegin;
3190   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3191   PetscValidType(B,1);
3192   PetscValidLogicalCollectiveInt(B,bs,2);
3193   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3194   PetscFunctionReturn(0);
3195 }
3196 
3197 /*@C
3198    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3199    (block compressed row).  For good matrix assembly performance
3200    the user should preallocate the matrix storage by setting the parameters
3201    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3202    performance can be increased by more than a factor of 50.
3203 
3204    Collective on MPI_Comm
3205 
3206    Input Parameters:
3207 +  comm - MPI communicator
3208 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3209           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3210 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3211            This value should be the same as the local size used in creating the
3212            y vector for the matrix-vector product y = Ax.
3213 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3214            This value should be the same as the local size used in creating the
3215            x vector for the matrix-vector product y = Ax.
3216 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3217 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3218 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3219            submatrix  (same for all local rows)
3220 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3221            of the in diagonal portion of the local (possibly different for each block
3222            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3223            and set it even if it is zero.
3224 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3225            submatrix (same for all local rows).
3226 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3227            off-diagonal portion of the local submatrix (possibly different for
3228            each block row) or NULL.
3229 
3230    Output Parameter:
3231 .  A - the matrix
3232 
3233    Options Database Keys:
3234 +   -mat_block_size - size of the blocks to use
3235 -   -mat_use_hash_table <fact>
3236 
3237    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3238    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3239    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3240 
3241    Notes:
3242    If the *_nnz parameter is given then the *_nz parameter is ignored
3243 
3244    A nonzero block is any block that as 1 or more nonzeros in it
3245 
3246    The user MUST specify either the local or global matrix dimensions
3247    (possibly both).
3248 
3249    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3250    than it must be used on all processors that share the object for that argument.
3251 
3252    Storage Information:
3253    For a square global matrix we define each processor's diagonal portion
3254    to be its local rows and the corresponding columns (a square submatrix);
3255    each processor's off-diagonal portion encompasses the remainder of the
3256    local matrix (a rectangular submatrix).
3257 
3258    The user can specify preallocated storage for the diagonal part of
3259    the local submatrix with either d_nz or d_nnz (not both).  Set
3260    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3261    memory allocation.  Likewise, specify preallocated storage for the
3262    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3263 
3264    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3265    the figure below we depict these three local rows and all columns (0-11).
3266 
3267 .vb
3268            0 1 2 3 4 5 6 7 8 9 10 11
3269           --------------------------
3270    row 3  |o o o d d d o o o o  o  o
3271    row 4  |o o o d d d o o o o  o  o
3272    row 5  |o o o d d d o o o o  o  o
3273           --------------------------
3274 .ve
3275 
3276    Thus, any entries in the d locations are stored in the d (diagonal)
3277    submatrix, and any entries in the o locations are stored in the
3278    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3279    stored simply in the MATSEQBAIJ format for compressed row storage.
3280 
3281    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3282    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3283    In general, for PDE problems in which most nonzeros are near the diagonal,
3284    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3285    or you will get TERRIBLE performance; see the users' manual chapter on
3286    matrices.
3287 
3288    Level: intermediate
3289 
3290 .keywords: matrix, block, aij, compressed row, sparse, parallel
3291 
3292 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3293 @*/
3294 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3295 {
3296   PetscErrorCode ierr;
3297   PetscMPIInt    size;
3298 
3299   PetscFunctionBegin;
3300   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3301   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3303   if (size > 1) {
3304     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3305     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3306   } else {
3307     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3308     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3309   }
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3314 {
3315   Mat            mat;
3316   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3317   PetscErrorCode ierr;
3318   PetscInt       len=0;
3319 
3320   PetscFunctionBegin;
3321   *newmat = 0;
3322   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3323   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3324   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3325 
3326   mat->factortype   = matin->factortype;
3327   mat->preallocated = PETSC_TRUE;
3328   mat->assembled    = PETSC_TRUE;
3329   mat->insertmode   = NOT_SET_VALUES;
3330 
3331   a             = (Mat_MPIBAIJ*)mat->data;
3332   mat->rmap->bs = matin->rmap->bs;
3333   a->bs2        = oldmat->bs2;
3334   a->mbs        = oldmat->mbs;
3335   a->nbs        = oldmat->nbs;
3336   a->Mbs        = oldmat->Mbs;
3337   a->Nbs        = oldmat->Nbs;
3338 
3339   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3340   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3341 
3342   a->size         = oldmat->size;
3343   a->rank         = oldmat->rank;
3344   a->donotstash   = oldmat->donotstash;
3345   a->roworiented  = oldmat->roworiented;
3346   a->rowindices   = 0;
3347   a->rowvalues    = 0;
3348   a->getrowactive = PETSC_FALSE;
3349   a->barray       = 0;
3350   a->rstartbs     = oldmat->rstartbs;
3351   a->rendbs       = oldmat->rendbs;
3352   a->cstartbs     = oldmat->cstartbs;
3353   a->cendbs       = oldmat->cendbs;
3354 
3355   /* hash table stuff */
3356   a->ht           = 0;
3357   a->hd           = 0;
3358   a->ht_size      = 0;
3359   a->ht_flag      = oldmat->ht_flag;
3360   a->ht_fact      = oldmat->ht_fact;
3361   a->ht_total_ct  = 0;
3362   a->ht_insert_ct = 0;
3363 
3364   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3365   if (oldmat->colmap) {
3366 #if defined(PETSC_USE_CTABLE)
3367     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3368 #else
3369     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3370     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3371     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3372 #endif
3373   } else a->colmap = 0;
3374 
3375   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3376     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3377     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3378     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3379   } else a->garray = 0;
3380 
3381   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3382   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3383   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3384   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3385   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3386 
3387   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3388   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3389   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3390   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3391   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3392   *newmat = mat;
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3397 {
3398   PetscErrorCode ierr;
3399   int            fd;
3400   PetscInt       i,nz,j,rstart,rend;
3401   PetscScalar    *vals,*buf;
3402   MPI_Comm       comm;
3403   MPI_Status     status;
3404   PetscMPIInt    rank,size,maxnz;
3405   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3406   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3407   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3408   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3409   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3410   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3411   PetscBool      isbinary;
3412 
3413   PetscFunctionBegin;
3414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3415   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3416 
3417   /* force binary viewer to load .info file if it has not yet done so */
3418   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3419   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3420   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3421   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3422   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3423   if (bs < 0) bs = 1;
3424 
3425   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3426   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3427   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3428   if (!rank) {
3429     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3430     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3431     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3432   }
3433   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3434   M    = header[1]; N = header[2];
3435 
3436   /* If global sizes are set, check if they are consistent with that given in the file */
3437   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3438   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3439 
3440   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3441 
3442   /*
3443      This code adds extra rows to make sure the number of rows is
3444      divisible by the blocksize
3445   */
3446   Mbs        = M/bs;
3447   extra_rows = bs - M + bs*Mbs;
3448   if (extra_rows == bs) extra_rows = 0;
3449   else                  Mbs++;
3450   if (extra_rows && !rank) {
3451     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3452   }
3453 
3454   /* determine ownership of all rows */
3455   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3456     mbs = Mbs/size + ((Mbs % size) > rank);
3457     m   = mbs*bs;
3458   } else { /* User set */
3459     m   = newmat->rmap->n;
3460     mbs = m/bs;
3461   }
3462   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3463   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3464 
3465   /* process 0 needs enough room for process with most rows */
3466   if (!rank) {
3467     mmax = rowners[1];
3468     for (i=2; i<=size; i++) {
3469       mmax = PetscMax(mmax,rowners[i]);
3470     }
3471     mmax*=bs;
3472   } else mmax = -1;             /* unused, but compiler warns anyway */
3473 
3474   rowners[0] = 0;
3475   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3476   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3477   rstart = rowners[rank];
3478   rend   = rowners[rank+1];
3479 
3480   /* distribute row lengths to all processors */
3481   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3482   if (!rank) {
3483     mend = m;
3484     if (size == 1) mend = mend - extra_rows;
3485     ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3486     for (j=mend; j<m; j++) locrowlens[j] = 1;
3487     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3488     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3489     for (j=0; j<m; j++) {
3490       procsnz[0] += locrowlens[j];
3491     }
3492     for (i=1; i<size; i++) {
3493       mend = browners[i+1] - browners[i];
3494       if (i == size-1) mend = mend - extra_rows;
3495       ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3496       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3497       /* calculate the number of nonzeros on each processor */
3498       for (j=0; j<browners[i+1]-browners[i]; j++) {
3499         procsnz[i] += rowlengths[j];
3500       }
3501       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3502     }
3503     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3504   } else {
3505     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3506   }
3507 
3508   if (!rank) {
3509     /* determine max buffer needed and allocate it */
3510     maxnz = procsnz[0];
3511     for (i=1; i<size; i++) {
3512       maxnz = PetscMax(maxnz,procsnz[i]);
3513     }
3514     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3515 
3516     /* read in my part of the matrix column indices  */
3517     nz     = procsnz[0];
3518     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3519     mycols = ibuf;
3520     if (size == 1) nz -= extra_rows;
3521     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3522     if (size == 1) {
3523       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3524     }
3525 
3526     /* read in every ones (except the last) and ship off */
3527     for (i=1; i<size-1; i++) {
3528       nz   = procsnz[i];
3529       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3530       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3531     }
3532     /* read in the stuff for the last proc */
3533     if (size != 1) {
3534       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3535       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3536       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3537       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3538     }
3539     ierr = PetscFree(cols);CHKERRQ(ierr);
3540   } else {
3541     /* determine buffer space needed for message */
3542     nz = 0;
3543     for (i=0; i<m; i++) {
3544       nz += locrowlens[i];
3545     }
3546     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3547     mycols = ibuf;
3548     /* receive message of column indices*/
3549     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3550     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3551     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3552   }
3553 
3554   /* loop over local rows, determining number of off diagonal entries */
3555   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3556   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3557   rowcount = 0; nzcount = 0;
3558   for (i=0; i<mbs; i++) {
3559     dcount  = 0;
3560     odcount = 0;
3561     for (j=0; j<bs; j++) {
3562       kmax = locrowlens[rowcount];
3563       for (k=0; k<kmax; k++) {
3564         tmp = mycols[nzcount++]/bs;
3565         if (!mask[tmp]) {
3566           mask[tmp] = 1;
3567           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3568           else masked1[dcount++] = tmp;
3569         }
3570       }
3571       rowcount++;
3572     }
3573 
3574     dlens[i]  = dcount;
3575     odlens[i] = odcount;
3576 
3577     /* zero out the mask elements we set */
3578     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3579     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3580   }
3581 
3582   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3583   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3584 
3585   if (!rank) {
3586     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3587     /* read in my part of the matrix numerical values  */
3588     nz     = procsnz[0];
3589     vals   = buf;
3590     mycols = ibuf;
3591     if (size == 1) nz -= extra_rows;
3592     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3593     if (size == 1) {
3594       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3595     }
3596 
3597     /* insert into matrix */
3598     jj = rstart*bs;
3599     for (i=0; i<m; i++) {
3600       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3601       mycols += locrowlens[i];
3602       vals   += locrowlens[i];
3603       jj++;
3604     }
3605     /* read in other processors (except the last one) and ship out */
3606     for (i=1; i<size-1; i++) {
3607       nz   = procsnz[i];
3608       vals = buf;
3609       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3610       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3611     }
3612     /* the last proc */
3613     if (size != 1) {
3614       nz   = procsnz[i] - extra_rows;
3615       vals = buf;
3616       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3617       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3618       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3619     }
3620     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3621   } else {
3622     /* receive numeric values */
3623     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3624 
3625     /* receive message of values*/
3626     vals   = buf;
3627     mycols = ibuf;
3628     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3629 
3630     /* insert into matrix */
3631     jj = rstart*bs;
3632     for (i=0; i<m; i++) {
3633       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3634       mycols += locrowlens[i];
3635       vals   += locrowlens[i];
3636       jj++;
3637     }
3638   }
3639   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3640   ierr = PetscFree(buf);CHKERRQ(ierr);
3641   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3642   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3643   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3644   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3645   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3646   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   PetscFunctionReturn(0);
3648 }
3649 
3650 /*@
3651    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3652 
3653    Input Parameters:
3654 .  mat  - the matrix
3655 .  fact - factor
3656 
3657    Not Collective, each process can use a different factor
3658 
3659    Level: advanced
3660 
3661   Notes:
3662    This can also be set by the command line option: -mat_use_hash_table <fact>
3663 
3664 .keywords: matrix, hashtable, factor, HT
3665 
3666 .seealso: MatSetOption()
3667 @*/
3668 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3669 {
3670   PetscErrorCode ierr;
3671 
3672   PetscFunctionBegin;
3673   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3674   PetscFunctionReturn(0);
3675 }
3676 
3677 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3678 {
3679   Mat_MPIBAIJ *baij;
3680 
3681   PetscFunctionBegin;
3682   baij          = (Mat_MPIBAIJ*)mat->data;
3683   baij->ht_fact = fact;
3684   PetscFunctionReturn(0);
3685 }
3686 
3687 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3688 {
3689   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3690   PetscBool      flg;
3691   PetscErrorCode ierr;
3692 
3693   PetscFunctionBegin;
3694   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3695   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3696   if (Ad)     *Ad     = a->A;
3697   if (Ao)     *Ao     = a->B;
3698   if (colmap) *colmap = a->garray;
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 /*
3703     Special version for direct calls from Fortran (to eliminate two function call overheads
3704 */
3705 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3706 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3707 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3708 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3709 #endif
3710 
3711 /*@C
3712   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3713 
3714   Collective on Mat
3715 
3716   Input Parameters:
3717 + mat - the matrix
3718 . min - number of input rows
3719 . im - input rows
3720 . nin - number of input columns
3721 . in - input columns
3722 . v - numerical values input
3723 - addvin - INSERT_VALUES or ADD_VALUES
3724 
3725   Notes:
3726     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3727 
3728   Level: advanced
3729 
3730 .seealso:   MatSetValuesBlocked()
3731 @*/
3732 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3733 {
3734   /* convert input arguments to C version */
3735   Mat        mat  = *matin;
3736   PetscInt   m    = *min, n = *nin;
3737   InsertMode addv = *addvin;
3738 
3739   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3740   const MatScalar *value;
3741   MatScalar       *barray     = baij->barray;
3742   PetscBool       roworiented = baij->roworiented;
3743   PetscErrorCode  ierr;
3744   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3745   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3746   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3747 
3748   PetscFunctionBegin;
3749   /* tasks normally handled by MatSetValuesBlocked() */
3750   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3751 #if defined(PETSC_USE_DEBUG)
3752   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3753   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3754 #endif
3755   if (mat->assembled) {
3756     mat->was_assembled = PETSC_TRUE;
3757     mat->assembled     = PETSC_FALSE;
3758   }
3759   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3760 
3761 
3762   if (!barray) {
3763     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3764     baij->barray = barray;
3765   }
3766 
3767   if (roworiented) stepval = (n-1)*bs;
3768   else stepval = (m-1)*bs;
3769 
3770   for (i=0; i<m; i++) {
3771     if (im[i] < 0) continue;
3772 #if defined(PETSC_USE_DEBUG)
3773     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3774 #endif
3775     if (im[i] >= rstart && im[i] < rend) {
3776       row = im[i] - rstart;
3777       for (j=0; j<n; j++) {
3778         /* If NumCol = 1 then a copy is not required */
3779         if ((roworiented) && (n == 1)) {
3780           barray = (MatScalar*)v + i*bs2;
3781         } else if ((!roworiented) && (m == 1)) {
3782           barray = (MatScalar*)v + j*bs2;
3783         } else { /* Here a copy is required */
3784           if (roworiented) {
3785             value = v + i*(stepval+bs)*bs + j*bs;
3786           } else {
3787             value = v + j*(stepval+bs)*bs + i*bs;
3788           }
3789           for (ii=0; ii<bs; ii++,value+=stepval) {
3790             for (jj=0; jj<bs; jj++) {
3791               *barray++ = *value++;
3792             }
3793           }
3794           barray -=bs2;
3795         }
3796 
3797         if (in[j] >= cstart && in[j] < cend) {
3798           col  = in[j] - cstart;
3799           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3800         } else if (in[j] < 0) continue;
3801 #if defined(PETSC_USE_DEBUG)
3802         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3803 #endif
3804         else {
3805           if (mat->was_assembled) {
3806             if (!baij->colmap) {
3807               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3808             }
3809 
3810 #if defined(PETSC_USE_DEBUG)
3811 #if defined(PETSC_USE_CTABLE)
3812             { PetscInt data;
3813               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3814               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3815             }
3816 #else
3817             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3818 #endif
3819 #endif
3820 #if defined(PETSC_USE_CTABLE)
3821             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3822             col  = (col - 1)/bs;
3823 #else
3824             col = (baij->colmap[in[j]] - 1)/bs;
3825 #endif
3826             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3827               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3828               col  =  in[j];
3829             }
3830           } else col = in[j];
3831           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3832         }
3833       }
3834     } else {
3835       if (!baij->donotstash) {
3836         if (roworiented) {
3837           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3838         } else {
3839           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3840         }
3841       }
3842     }
3843   }
3844 
3845   /* task normally handled by MatSetValuesBlocked() */
3846   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 
3850 /*@
3851      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3852          CSR format the local rows.
3853 
3854    Collective on MPI_Comm
3855 
3856    Input Parameters:
3857 +  comm - MPI communicator
3858 .  bs - the block size, only a block size of 1 is supported
3859 .  m - number of local rows (Cannot be PETSC_DECIDE)
3860 .  n - This value should be the same as the local size used in creating the
3861        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3862        calculated if N is given) For square matrices n is almost always m.
3863 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3864 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3865 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3866 .   j - column indices
3867 -   a - matrix values
3868 
3869    Output Parameter:
3870 .   mat - the matrix
3871 
3872    Level: intermediate
3873 
3874    Notes:
3875        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3876      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3877      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3878 
3879      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3880      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3881      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3882      with column-major ordering within blocks.
3883 
3884        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3885 
3886 .keywords: matrix, aij, compressed row, sparse, parallel
3887 
3888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3889           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3890 @*/
3891 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3892 {
3893   PetscErrorCode ierr;
3894 
3895   PetscFunctionBegin;
3896   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3897   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3898   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3899   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3900   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3901   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3902   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3903   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3904   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3905   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3910 {
3911   PetscErrorCode ierr;
3912   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3913   PetscInt       *indx;
3914   PetscScalar    *values;
3915 
3916   PetscFunctionBegin;
3917   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3918   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3919     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3920     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3921     PetscInt       *bindx,rmax=a->rmax,j;
3922     PetscMPIInt    rank,size;
3923 
3924     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3925     mbs = m/bs; Nbs = N/cbs;
3926     if (n == PETSC_DECIDE) {
3927       nbs  = n;
3928       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3929       n    = nbs*cbs;
3930     } else {
3931       nbs = n/cbs;
3932     }
3933 
3934     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3935     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3936 
3937     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3938     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3939     if (rank == size-1) {
3940       /* Check sum(nbs) = Nbs */
3941       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3942     }
3943 
3944     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3945     for (i=0; i<mbs; i++) {
3946       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3947       nnz = nnz/bs;
3948       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3949       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3950       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3951     }
3952     ierr = PetscFree(bindx);CHKERRQ(ierr);
3953 
3954     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3955     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3956     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3957     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3958     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3959     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3960     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3961   }
3962 
3963   /* numeric phase */
3964   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3965   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3966 
3967   for (i=0; i<m; i++) {
3968     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3969     Ii   = i + rstart;
3970     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3971     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3972   }
3973   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977