xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 65f8aed5f7eaa1e2ef2ddeffe666264e0669c876)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
92   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
98   { \
99  \
100     brow = row/bs;  \
101     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
102     rmax = aimax[brow]; nrow = ailen[brow]; \
103     bcol = col/bs; \
104     ridx = row % bs; cidx = col % bs; \
105     low  = 0; high = nrow; \
106     while (high-low > 3) { \
107       t = (low+high)/2; \
108       if (rp[t] > bcol) high = t; \
109       else              low  = t; \
110     } \
111     for (_i=low; _i<high; _i++) { \
112       if (rp[_i] > bcol) break; \
113       if (rp[_i] == bcol) { \
114         bap = ap +  bs2*_i + bs*cidx + ridx; \
115         if (addv == ADD_VALUES) *bap += value;  \
116         else                    *bap  = value;  \
117         goto a_noinsert; \
118       } \
119     } \
120     if (a->nonew == 1) goto a_noinsert; \
121     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
122     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
123     N = nrow++ - 1;  \
124     /* shift up all the later entries in this row */ \
125     for (ii=N; ii>=_i; ii--) { \
126       rp[ii+1] = rp[ii]; \
127       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
128     } \
129     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
130     rp[_i]                      = bcol;  \
131     ap[bs2*_i + bs*cidx + ridx] = value;  \
132 a_noinsert:; \
133     ailen[brow] = nrow; \
134   }
135 
136 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
137   { \
138     brow = row/bs;  \
139     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
140     rmax = bimax[brow]; nrow = bilen[brow]; \
141     bcol = col/bs; \
142     ridx = row % bs; cidx = col % bs; \
143     low  = 0; high = nrow; \
144     while (high-low > 3) { \
145       t = (low+high)/2; \
146       if (rp[t] > bcol) high = t; \
147       else              low  = t; \
148     } \
149     for (_i=low; _i<high; _i++) { \
150       if (rp[_i] > bcol) break; \
151       if (rp[_i] == bcol) { \
152         bap = ap +  bs2*_i + bs*cidx + ridx; \
153         if (addv == ADD_VALUES) *bap += value;  \
154         else                    *bap  = value;  \
155         goto b_noinsert; \
156       } \
157     } \
158     if (b->nonew == 1) goto b_noinsert; \
159     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
160     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
161     N = nrow++ - 1;  \
162     /* shift up all the later entries in this row */ \
163     for (ii=N; ii>=_i; ii--) { \
164       rp[ii+1] = rp[ii]; \
165       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
166     } \
167     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
168     rp[_i]                      = bcol;  \
169     ap[bs2*_i + bs*cidx + ridx] = value;  \
170 b_noinsert:; \
171     bilen[brow] = nrow; \
172   }
173 
174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
175 {
176   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
177   MatScalar      value;
178   PetscBool      roworiented = baij->roworiented;
179   PetscErrorCode ierr;
180   PetscInt       i,j,row,col;
181   PetscInt       rstart_orig=mat->rmap->rstart;
182   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
183   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
184 
185   /* Some Variables required in the macro */
186   Mat         A     = baij->A;
187   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
188   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
189   MatScalar   *aa   =a->a;
190 
191   Mat         B     = baij->B;
192   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
193   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
194   MatScalar   *ba   =b->a;
195 
196   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
197   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
198   MatScalar *ap,*bap;
199 
200   PetscFunctionBegin;
201   for (i=0; i<m; i++) {
202     if (im[i] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
205 #endif
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j=0; j<n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i*n+j];
212           else             value = v[i+j*m];
213           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
214           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
215         } else if (in[j] < 0) continue;
216 #if defined(PETSC_USE_DEBUG)
217         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!baij->colmap) {
222               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
223             }
224 #if defined(PETSC_USE_CTABLE)
225             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
226             col  = col - 1;
227 #else
228             col = baij->colmap[in[j]/bs] - 1;
229 #endif
230             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
231               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
232               col  =  in[j];
233               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
234               B    = baij->B;
235               b    = (Mat_SeqBAIJ*)(B)->data;
236               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
237               ba   =b->a;
238             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
239             else col += in[j]%bs;
240           } else col = in[j];
241           if (roworiented) value = v[i*n+j];
242           else             value = v[i+j*m];
243           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
244           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
245         }
246       }
247     } else {
248       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
249       if (!baij->donotstash) {
250         mat->assembled = PETSC_FALSE;
251         if (roworiented) {
252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
253         } else {
254           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
263 {
264   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
265   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
266   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
267   PetscErrorCode    ierr;
268   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
269   PetscBool         roworiented=a->roworiented;
270   const PetscScalar *value     = v;
271   MatScalar         *ap,*aa = a->a,*bap;
272 
273   PetscFunctionBegin;
274   rp   = aj + ai[row];
275   ap   = aa + bs2*ai[row];
276   rmax = imax[row];
277   nrow = ailen[row];
278   value = v;
279   low = 0;
280   high = nrow;
281   while (high-low > 7) {
282     t = (low+high)/2;
283     if (rp[t] > col) high = t;
284     else             low  = t;
285   }
286   for (i=low; i<high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap +  bs2*i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii=0; ii<bs; ii++) {
293             for (jj=ii; jj<bs2; jj+=bs) {
294               bap[jj] += *value++;
295             }
296           }
297         } else {
298           for (ii=0; ii<bs; ii++) {
299             for (jj=ii; jj<bs2; jj+=bs) {
300               bap[jj] = *value++;
301             }
302           }
303         }
304       } else {
305         if (is == ADD_VALUES) {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj] += value[jj];
309             }
310             bap += bs;
311           }
312         } else {
313           for (ii=0; ii<bs; ii++,value+=bs) {
314             for (jj=0; jj<bs; jj++) {
315               bap[jj]  = value[jj];
316             }
317             bap += bs;
318           }
319         }
320       }
321       goto noinsert2;
322     }
323   }
324   if (nonew == 1) goto noinsert2;
325   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
326   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
327   N = nrow++ - 1; high++;
328   /* shift up all the later entries in this row */
329   for (ii=N; ii>=i; ii--) {
330     rp[ii+1] = rp[ii];
331     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
332   }
333   if (N >= i) {
334     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
335   }
336   rp[i] = col;
337   bap   = ap +  bs2*i;
338   if (roworiented) {
339     for (ii=0; ii<bs; ii++) {
340       for (jj=ii; jj<bs2; jj+=bs) {
341         bap[jj] = *value++;
342       }
343     }
344   } else {
345     for (ii=0; ii<bs; ii++) {
346       for (jj=0; jj<bs; jj++) {
347         *bap++ = *value++;
348       }
349     }
350   }
351   noinsert2:;
352   ailen[row] = nrow;
353   PetscFunctionReturn(0);
354 }
355 
356 /*
357     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
358     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
359 */
360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
361 {
362   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
363   const PetscScalar *value;
364   MatScalar         *barray     = baij->barray;
365   PetscBool         roworiented = baij->roworiented;
366   PetscErrorCode    ierr;
367   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
368   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
369   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
370 
371   PetscFunctionBegin;
372   if (!barray) {
373     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
374     baij->barray = barray;
375   }
376 
377   if (roworiented) stepval = (n-1)*bs;
378   else stepval = (m-1)*bs;
379 
380   for (i=0; i<m; i++) {
381     if (im[i] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
384 #endif
385     if (im[i] >= rstart && im[i] < rend) {
386       row = im[i] - rstart;
387       for (j=0; j<n; j++) {
388         /* If NumCol = 1 then a copy is not required */
389         if ((roworiented) && (n == 1)) {
390           barray = (MatScalar*)v + i*bs2;
391         } else if ((!roworiented) && (m == 1)) {
392           barray = (MatScalar*)v + j*bs2;
393         } else { /* Here a copy is required */
394           if (roworiented) {
395             value = v + (i*(stepval+bs) + j)*bs;
396           } else {
397             value = v + (j*(stepval+bs) + i)*bs;
398           }
399           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
400             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
401             barray += bs;
402           }
403           barray -= bs2;
404         }
405 
406         if (in[j] >= cstart && in[j] < cend) {
407           col  = in[j] - cstart;
408           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
409         } else if (in[j] < 0) continue;
410 #if defined(PETSC_USE_DEBUG)
411         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
412 #endif
413         else {
414           if (mat->was_assembled) {
415             if (!baij->colmap) {
416               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
417             }
418 
419 #if defined(PETSC_USE_DEBUG)
420 #if defined(PETSC_USE_CTABLE)
421             { PetscInt data;
422               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
423               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
424             }
425 #else
426             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
427 #endif
428 #endif
429 #if defined(PETSC_USE_CTABLE)
430             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431             col  = (col - 1)/bs;
432 #else
433             col = (baij->colmap[in[j]] - 1)/bs;
434 #endif
435             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
436               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
437               col  =  in[j];
438             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
439           } else col = in[j];
440           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
441         }
442       }
443     } else {
444       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #define HASH_KEY 0.6180339887
458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
462 {
463   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
464   PetscBool      roworiented = baij->roworiented;
465   PetscErrorCode ierr;
466   PetscInt       i,j,row,col;
467   PetscInt       rstart_orig=mat->rmap->rstart;
468   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
469   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
470   PetscReal      tmp;
471   MatScalar      **HD = baij->hd,value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   for (i=0; i<m; i++) {
478 #if defined(PETSC_USE_DEBUG)
479     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
480     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
481 #endif
482     row = im[i];
483     if (row >= rstart_orig && row < rend_orig) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486         if (roworiented) value = v[i*n+j];
487         else             value = v[i+j*m];
488         /* Look up PetscInto the Hash Table */
489         key = (row/bs)*Nbs+(col/bs)+1;
490         h1  = HASH(size,key,tmp);
491 
492 
493         idx = h1;
494 #if defined(PETSC_USE_DEBUG)
495         insert_ct++;
496         total_ct++;
497         if (HT[idx] != key) {
498           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
499           if (idx == size) {
500             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
501             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
502           }
503         }
504 #else
505         if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
509             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510           }
511         }
512 #endif
513         /* A HASH table entry is found, so insert the values at the correct address */
514         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
515         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
516       }
517     } else if (!baij->donotstash) {
518       if (roworiented) {
519         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
520       } else {
521         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
522       }
523     }
524   }
525 #if defined(PETSC_USE_DEBUG)
526   baij->ht_total_ct  += total_ct;
527   baij->ht_insert_ct += insert_ct;
528 #endif
529   PetscFunctionReturn(0);
530 }
531 
532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
533 {
534   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
535   PetscBool         roworiented = baij->roworiented;
536   PetscErrorCode    ierr;
537   PetscInt          i,j,ii,jj,row,col;
538   PetscInt          rstart=baij->rstartbs;
539   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
540   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
541   PetscReal         tmp;
542   MatScalar         **HD = baij->hd,*baij_a;
543   const PetscScalar *v_t,*value;
544 #if defined(PETSC_USE_DEBUG)
545   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
546 #endif
547 
548   PetscFunctionBegin;
549   if (roworiented) stepval = (n-1)*bs;
550   else stepval = (m-1)*bs;
551 
552   for (i=0; i<m; i++) {
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
555     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
556 #endif
557     row = im[i];
558     v_t = v + i*nbs2;
559     if (row >= rstart && row < rend) {
560       for (j=0; j<n; j++) {
561         col = in[j];
562 
563         /* Look up into the Hash Table */
564         key = row*Nbs+col+1;
565         h1  = HASH(size,key,tmp);
566 
567         idx = h1;
568 #if defined(PETSC_USE_DEBUG)
569         total_ct++;
570         insert_ct++;
571         if (HT[idx] != key) {
572           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
573           if (idx == size) {
574             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
575             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
576           }
577         }
578 #else
579         if (HT[idx] != key) {
580           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
581           if (idx == size) {
582             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
583             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
584           }
585         }
586 #endif
587         baij_a = HD[idx];
588         if (roworiented) {
589           /*value = v + i*(stepval+bs)*bs + j*bs;*/
590           /* value = v + (i*(stepval+bs)+j)*bs; */
591           value = v_t;
592           v_t  += bs;
593           if (addv == ADD_VALUES) {
594             for (ii=0; ii<bs; ii++,value+=stepval) {
595               for (jj=ii; jj<bs2; jj+=bs) {
596                 baij_a[jj] += *value++;
597               }
598             }
599           } else {
600             for (ii=0; ii<bs; ii++,value+=stepval) {
601               for (jj=ii; jj<bs2; jj+=bs) {
602                 baij_a[jj] = *value++;
603               }
604             }
605           }
606         } else {
607           value = v + j*(stepval+bs)*bs + i*bs;
608           if (addv == ADD_VALUES) {
609             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
610               for (jj=0; jj<bs; jj++) {
611                 baij_a[jj] += *value++;
612               }
613             }
614           } else {
615             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
616               for (jj=0; jj<bs; jj++) {
617                 baij_a[jj] = *value++;
618               }
619             }
620           }
621         }
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_DEBUG)
634   baij->ht_total_ct  += total_ct;
635   baij->ht_insert_ct += insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
641 {
642   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
643   PetscErrorCode ierr;
644   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
645   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
646 
647   PetscFunctionBegin;
648   for (i=0; i<m; i++) {
649     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
650     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
651     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
652       row = idxm[i] - bsrstart;
653       for (j=0; j<n; j++) {
654         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
655         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
656         if (idxn[j] >= bscstart && idxn[j] < bscend) {
657           col  = idxn[j] - bscstart;
658           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
659         } else {
660           if (!baij->colmap) {
661             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
662           }
663 #if defined(PETSC_USE_CTABLE)
664           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
665           data--;
666 #else
667           data = baij->colmap[idxn[j]/bs]-1;
668 #endif
669           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
670           else {
671             col  = data + idxn[j]%bs;
672             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
673           }
674         }
675       }
676     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
682 {
683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
684   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
685   PetscErrorCode ierr;
686   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
687   PetscReal      sum = 0.0;
688   MatScalar      *v;
689 
690   PetscFunctionBegin;
691   if (baij->size == 1) {
692     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
693   } else {
694     if (type == NORM_FROBENIUS) {
695       v  = amat->a;
696       nz = amat->nz*bs2;
697       for (i=0; i<nz; i++) {
698         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
699       }
700       v  = bmat->a;
701       nz = bmat->nz*bs2;
702       for (i=0; i<nz; i++) {
703         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
704       }
705       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
706       *nrm = PetscSqrtReal(*nrm);
707     } else if (type == NORM_1) { /* max column sum */
708       PetscReal *tmp,*tmp2;
709       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
710       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
711       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
712       v    = amat->a; jj = amat->j;
713       for (i=0; i<amat->nz; i++) {
714         for (j=0; j<bs; j++) {
715           col = bs*(cstart + *jj) + j; /* column index */
716           for (row=0; row<bs; row++) {
717             tmp[col] += PetscAbsScalar(*v);  v++;
718           }
719         }
720         jj++;
721       }
722       v = bmat->a; jj = bmat->j;
723       for (i=0; i<bmat->nz; i++) {
724         for (j=0; j<bs; j++) {
725           col = bs*garray[*jj] + j;
726           for (row=0; row<bs; row++) {
727             tmp[col] += PetscAbsScalar(*v); v++;
728           }
729         }
730         jj++;
731       }
732       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
733       *nrm = 0.0;
734       for (j=0; j<mat->cmap->N; j++) {
735         if (tmp2[j] > *nrm) *nrm = tmp2[j];
736       }
737       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
738     } else if (type == NORM_INFINITY) { /* max row sum */
739       PetscReal *sums;
740       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
741       sum  = 0.0;
742       for (j=0; j<amat->mbs; j++) {
743         for (row=0; row<bs; row++) sums[row] = 0.0;
744         v  = amat->a + bs2*amat->i[j];
745         nz = amat->i[j+1]-amat->i[j];
746         for (i=0; i<nz; i++) {
747           for (col=0; col<bs; col++) {
748             for (row=0; row<bs; row++) {
749               sums[row] += PetscAbsScalar(*v); v++;
750             }
751           }
752         }
753         v  = bmat->a + bs2*bmat->i[j];
754         nz = bmat->i[j+1]-bmat->i[j];
755         for (i=0; i<nz; i++) {
756           for (col=0; col<bs; col++) {
757             for (row=0; row<bs; row++) {
758               sums[row] += PetscAbsScalar(*v); v++;
759             }
760           }
761         }
762         for (row=0; row<bs; row++) {
763           if (sums[row] > sum) sum = sums[row];
764         }
765       }
766       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
767       ierr = PetscFree(sums);CHKERRQ(ierr);
768     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
781 {
782   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
783   Mat            A     = baij->A,B=baij->B;
784   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
785   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
786   PetscErrorCode ierr;
787   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
788   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
789   PetscInt       *HT,key;
790   MatScalar      **HD;
791   PetscReal      tmp;
792 #if defined(PETSC_USE_INFO)
793   PetscInt ct=0,max=0;
794 #endif
795 
796   PetscFunctionBegin;
797   if (baij->ht) PetscFunctionReturn(0);
798 
799   baij->ht_size = (PetscInt)(factor*nz);
800   ht_size       = baij->ht_size;
801 
802   /* Allocate Memory for Hash Table */
803   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
804   HD   = baij->hd;
805   HT   = baij->ht;
806 
807   /* Loop Over A */
808   for (i=0; i<a->mbs; i++) {
809     for (j=ai[i]; j<ai[i+1]; j++) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(ht_size,key,tmp);
815       for (k=0; k<ht_size; k++) {
816         if (!HT[(h1+k)%ht_size]) {
817           HT[(h1+k)%ht_size] = key;
818           HD[(h1+k)%ht_size] = a->a + j*bs2;
819           break;
820 #if defined(PETSC_USE_INFO)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(PETSC_USE_INFO)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for (i=0; i<b->mbs; i++) {
833     for (j=bi[i]; j<bi[i+1]; j++) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(ht_size,key,tmp);
838       for (k=0; k<ht_size; k++) {
839         if (!HT[(h1+k)%ht_size]) {
840           HT[(h1+k)%ht_size] = key;
841           HD[(h1+k)%ht_size] = b->a + j*bs2;
842           break;
843 #if defined(PETSC_USE_INFO)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(PETSC_USE_INFO)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(PETSC_USE_INFO)
857   for (i=0,j=0; i<ht_size; i++) {
858     if (HT[i]) j++;
859   }
860   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
868   PetscErrorCode ierr;
869   PetscInt       nstash,reallocs;
870 
871   PetscFunctionBegin;
872   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
873 
874   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
875   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
876   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
877   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
878   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
879   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
886   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
887   PetscErrorCode ierr;
888   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
889   PetscInt       *row,*col;
890   PetscBool      r1,r2,r3,other_disassembled;
891   MatScalar      *val;
892   PetscMPIInt    n;
893 
894   PetscFunctionBegin;
895   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
896   if (!baij->donotstash && !mat->nooffprocentries) {
897     while (1) {
898       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
899       if (!flg) break;
900 
901       for (i=0; i<n;) {
902         /* Now identify the consecutive vals belonging to the same row */
903         for (j=i,rstart=row[j]; j<n; j++) {
904           if (row[j] != rstart) break;
905         }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         /* Now assemble all these values with a single function call */
909         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
910         i    = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
914     /* Now process the block-stash. Since the values are stashed column-oriented,
915        set the roworiented flag to column oriented, and after MatSetValues()
916        restore the original flags */
917     r1 = baij->roworiented;
918     r2 = a->roworiented;
919     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
920 
921     baij->roworiented = PETSC_FALSE;
922     a->roworiented    = PETSC_FALSE;
923 
924     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
925     while (1) {
926       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
927       if (!flg) break;
928 
929       for (i=0; i<n;) {
930         /* Now identify the consecutive vals belonging to the same row */
931         for (j=i,rstart=row[j]; j<n; j++) {
932           if (row[j] != rstart) break;
933         }
934         if (j < n) ncols = j-i;
935         else       ncols = n-i;
936         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
937         i    = j;
938       }
939     }
940     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
941 
942     baij->roworiented = r1;
943     a->roworiented    = r2;
944 
945     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
946   }
947 
948   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
950 
951   /* determine if any processor has disassembled, if so we must
952      also disassemble ourselfs, in order that we may reassemble. */
953   /*
954      if nonzero structure of submatrix B cannot change then we know that
955      no processor disassembled thus we can skip this stuff
956   */
957   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
958     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     if (mat->was_assembled && !other_disassembled) {
960       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
961     }
962   }
963 
964   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
965     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
966   }
967   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
969 
970 #if defined(PETSC_USE_INFO)
971   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
972     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
973 
974     baij->ht_total_ct  = 0;
975     baij->ht_insert_ct = 0;
976   }
977 #endif
978   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
980 
981     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
982     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
983   }
984 
985   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
986 
987   baij->rowvalues = 0;
988 
989   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
990   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
991     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
992     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
998 #include <petscdraw.h>
999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1000 {
1001   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1002   PetscErrorCode    ierr;
1003   PetscMPIInt       rank = baij->rank;
1004   PetscInt          bs   = mat->rmap->bs;
1005   PetscBool         iascii,isdraw;
1006   PetscViewer       sviewer;
1007   PetscViewerFormat format;
1008 
1009   PetscFunctionBegin;
1010   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1011   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1012   if (iascii) {
1013     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1014     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1015       MatInfo info;
1016       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1017       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1018       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1019       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1020                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1021       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1022       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1023       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1024       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1025       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1026       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1027       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1028       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1031       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1032       PetscFunctionReturn(0);
1033     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1034       PetscFunctionReturn(0);
1035     }
1036   }
1037 
1038   if (isdraw) {
1039     PetscDraw draw;
1040     PetscBool isnull;
1041     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1042     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1043     if (isnull) PetscFunctionReturn(0);
1044   }
1045 
1046   {
1047     /* assemble the entire matrix onto first processor. */
1048     Mat         A;
1049     Mat_SeqBAIJ *Aloc;
1050     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1051     MatScalar   *a;
1052     const char  *matname;
1053 
1054     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1055     /* Perhaps this should be the type of mat? */
1056     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1059     } else {
1060       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1061     }
1062     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1063     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1064     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1066 
1067     /* copy over the A part */
1068     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1069     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1070     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1071 
1072     for (i=0; i<mbs; i++) {
1073       rvals[0] = bs*(baij->rstartbs + i);
1074       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1075       for (j=ai[i]; j<ai[i+1]; j++) {
1076         col = (baij->cstartbs+aj[j])*bs;
1077         for (k=0; k<bs; k++) {
1078           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1079           col++; a += bs;
1080         }
1081       }
1082     }
1083     /* copy over the B part */
1084     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1085     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1086     for (i=0; i<mbs; i++) {
1087       rvals[0] = bs*(baij->rstartbs + i);
1088       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1089       for (j=ai[i]; j<ai[i+1]; j++) {
1090         col = baij->garray[aj[j]]*bs;
1091         for (k=0; k<bs; k++) {
1092           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1093           col++; a += bs;
1094         }
1095       }
1096     }
1097     ierr = PetscFree(rvals);CHKERRQ(ierr);
1098     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     /*
1101        Everyone has to call to draw the matrix since the graphics waits are
1102        synchronized across all processors that share the PetscDraw object
1103     */
1104     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1105     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1106     if (!rank) {
1107       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1108       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1109     }
1110     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1111     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1118 {
1119   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1120   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1121   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1122   PetscErrorCode ierr;
1123   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1124   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1125   int            fd;
1126   PetscScalar    *column_values;
1127   FILE           *file;
1128   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1129   PetscInt       message_count,flowcontrolcount;
1130 
1131   PetscFunctionBegin;
1132   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1134   nz   = bs2*(A->nz + B->nz);
1135   rlen = mat->rmap->n;
1136   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1137   if (!rank) {
1138     header[0] = MAT_FILE_CLASSID;
1139     header[1] = mat->rmap->N;
1140     header[2] = mat->cmap->N;
1141 
1142     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1143     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1144     /* get largest number of rows any processor has */
1145     range = mat->rmap->range;
1146     for (i=1; i<size; i++) {
1147       rlen = PetscMax(rlen,range[i+1] - range[i]);
1148     }
1149   } else {
1150     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1151   }
1152 
1153   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1154   /* compute lengths of each row  */
1155   for (i=0; i<a->mbs; i++) {
1156     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1157   }
1158   /* store the row lengths to the file */
1159   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1160   if (!rank) {
1161     MPI_Status status;
1162     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1163     rlen = (range[1] - range[0])/bs;
1164     for (i=0; i<rlen; i++) {
1165       for (j=0; j<bs; j++) {
1166         row_lens[i*bs+j] = bs*crow_lens[i];
1167       }
1168     }
1169     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       rlen = (range[i+1] - range[i])/bs;
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       for (k=0; k<rlen; k++) {
1175         for (j=0; j<bs; j++) {
1176           row_lens[k*bs+j] = bs*crow_lens[k];
1177         }
1178       }
1179       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1182     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1183   } else {
1184     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1185     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1187   }
1188   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1189 
1190   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1191      information needed to make it for each row from a block row. This does require more communication but still not more than
1192      the communication needed for the nonzero values  */
1193   nzmax = nz; /*  space a largest processor needs */
1194   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1196   cnt   = 0;
1197   for (i=0; i<a->mbs; i++) {
1198     pcnt = cnt;
1199     for (j=B->i[i]; j<B->i[i+1]; j++) {
1200       if ((col = garray[B->j[j]]) > cstart) break;
1201       for (l=0; l<bs; l++) {
1202         column_indices[cnt++] = bs*col+l;
1203       }
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       for (l=0; l<bs; l++) {
1207         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1208       }
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1213       }
1214     }
1215     len = cnt - pcnt;
1216     for (k=1; k<bs; k++) {
1217       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1218       cnt += len;
1219     }
1220   }
1221   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1222 
1223   /* store the columns to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the numerical values */
1244   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<a->mbs; i++) {
1247     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1248     for (j=B->i[i]; j<B->i[i+1]; j++) {
1249       if (garray[B->j[j]] > cstart) break;
1250       for (l=0; l<bs; l++) {
1251         for (ll=0; ll<bs; ll++) {
1252           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1253         }
1254       }
1255       cnt += bs;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) {
1258       for (l=0; l<bs; l++) {
1259         for (ll=0; ll<bs; ll++) {
1260           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1261         }
1262       }
1263       cnt += bs;
1264     }
1265     for (; j<B->i[i+1]; j++) {
1266       for (l=0; l<bs; l++) {
1267         for (ll=0; ll<bs; ll++) {
1268           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1269         }
1270       }
1271       cnt += bs;
1272     }
1273     cnt += (bs-1)*rlen;
1274   }
1275   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1276 
1277   /* store the column values to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     MPI_Status status;
1281     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     for (i=1; i<size; i++) {
1283       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1284       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1285       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1292     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_values);CHKERRQ(ierr);
1296 
1297   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1298   if (file) {
1299     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1305 {
1306   PetscErrorCode ierr;
1307   PetscBool      iascii,isdraw,issocket,isbinary;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1314   if (iascii || isdraw || issocket) {
1315     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1316   } else if (isbinary) {
1317     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1323 {
1324   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328 #if defined(PETSC_USE_LOG)
1329   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1330 #endif
1331   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1332   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1333   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1334   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1335 #if defined(PETSC_USE_CTABLE)
1336   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1337 #else
1338   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1339 #endif
1340   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1341   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1342   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1343   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1344   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1345   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1346   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1347   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1348 
1349   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1358 #if defined(PETSC_HAVE_HYPRE)
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1360 #endif
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscInt       nt;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1374   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1375   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1376   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1377   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1379   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1385 {
1386   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1392   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401   PetscBool      merged;
1402 
1403   PetscFunctionBegin;
1404   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1405   /* do nondiagonal part */
1406   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1407   if (!merged) {
1408     /* send it on its way */
1409     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1410     /* do local part */
1411     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1412     /* receive remote parts: note this assumes the values are not actually */
1413     /* inserted in yy until the next line */
1414     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   } else {
1416     /* do local part */
1417     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1418     /* send it on its way */
1419     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1420     /* values actually were received in the Begin() but we need to call this nop */
1421     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1427 {
1428   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   /* do nondiagonal part */
1433   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1434   /* send it on its way */
1435   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1436   /* do local part */
1437   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1438   /* receive remote parts: note this assumes the values are not actually */
1439   /* inserted in yy until the next line, which is true for my implementation*/
1440   /* but is not perhaps always true. */
1441   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 /*
1446   This only works correctly for square matrices where the subblock A->A is the
1447    diagonal block
1448 */
1449 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1450 {
1451   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1452   PetscErrorCode ierr;
1453 
1454   PetscFunctionBegin;
1455   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1456   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1461 {
1462   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1467   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1477   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1479 
1480   PetscFunctionBegin;
1481   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1482   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1483   mat->getrowactive = PETSC_TRUE;
1484 
1485   if (!mat->rowvalues && (idx || v)) {
1486     /*
1487         allocate enough space to hold information from the longest row.
1488     */
1489     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1490     PetscInt    max = 1,mbs = mat->mbs,tmp;
1491     for (i=0; i<mbs; i++) {
1492       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1493       if (max < tmp) max = tmp;
1494     }
1495     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1496   }
1497   lrow = row - brstart;
1498 
1499   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1500   if (!v)   {pvA = 0; pvB = 0;}
1501   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1502   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   nztot = nzA + nzB;
1505 
1506   cmap = mat->garray;
1507   if (v  || idx) {
1508     if (nztot) {
1509       /* Sort by increasing column numbers, assuming A and B already sorted */
1510       PetscInt imark = -1;
1511       if (v) {
1512         *v = v_p = mat->rowvalues;
1513         for (i=0; i<nzB; i++) {
1514           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1515           else break;
1516         }
1517         imark = i;
1518         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1519         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1520       }
1521       if (idx) {
1522         *idx = idx_p = mat->rowindices;
1523         if (imark > -1) {
1524           for (i=0; i<imark; i++) {
1525             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1526           }
1527         } else {
1528           for (i=0; i<nzB; i++) {
1529             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1530             else break;
1531           }
1532           imark = i;
1533         }
1534         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1535         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1536       }
1537     } else {
1538       if (idx) *idx = 0;
1539       if (v)   *v   = 0;
1540     }
1541   }
1542   *nz  = nztot;
1543   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1544   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1554   baij->getrowactive = PETSC_FALSE;
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1559 {
1560   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1565   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1570 {
1571   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1572   Mat            A  = a->A,B = a->B;
1573   PetscErrorCode ierr;
1574   PetscReal      isend[5],irecv[5];
1575 
1576   PetscFunctionBegin;
1577   info->block_size = (PetscReal)matin->rmap->bs;
1578 
1579   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1580 
1581   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1582   isend[3] = info->memory;  isend[4] = info->mallocs;
1583 
1584   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1585 
1586   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1587   isend[3] += info->memory;  isend[4] += info->mallocs;
1588 
1589   if (flag == MAT_LOCAL) {
1590     info->nz_used      = isend[0];
1591     info->nz_allocated = isend[1];
1592     info->nz_unneeded  = isend[2];
1593     info->memory       = isend[3];
1594     info->mallocs      = isend[4];
1595   } else if (flag == MAT_GLOBAL_MAX) {
1596     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1597 
1598     info->nz_used      = irecv[0];
1599     info->nz_allocated = irecv[1];
1600     info->nz_unneeded  = irecv[2];
1601     info->memory       = irecv[3];
1602     info->mallocs      = irecv[4];
1603   } else if (flag == MAT_GLOBAL_SUM) {
1604     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1605 
1606     info->nz_used      = irecv[0];
1607     info->nz_allocated = irecv[1];
1608     info->nz_unneeded  = irecv[2];
1609     info->memory       = irecv[3];
1610     info->mallocs      = irecv[4];
1611   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1612   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1613   info->fill_ratio_needed = 0;
1614   info->factor_mallocs    = 0;
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1619 {
1620   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1621   PetscErrorCode ierr;
1622 
1623   PetscFunctionBegin;
1624   switch (op) {
1625   case MAT_NEW_NONZERO_LOCATIONS:
1626   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1627   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1628   case MAT_KEEP_NONZERO_PATTERN:
1629   case MAT_NEW_NONZERO_LOCATION_ERR:
1630     MatCheckPreallocated(A,1);
1631     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1632     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1633     break;
1634   case MAT_ROW_ORIENTED:
1635     MatCheckPreallocated(A,1);
1636     a->roworiented = flg;
1637 
1638     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1639     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1640     break;
1641   case MAT_NEW_DIAGONALS:
1642     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1643     break;
1644   case MAT_IGNORE_OFF_PROC_ENTRIES:
1645     a->donotstash = flg;
1646     break;
1647   case MAT_USE_HASH_TABLE:
1648     a->ht_flag = flg;
1649     a->ht_fact = 1.39;
1650     break;
1651   case MAT_SYMMETRIC:
1652   case MAT_STRUCTURALLY_SYMMETRIC:
1653   case MAT_HERMITIAN:
1654   case MAT_SUBMAT_SINGLEIS:
1655   case MAT_SYMMETRY_ETERNAL:
1656     MatCheckPreallocated(A,1);
1657     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1658     break;
1659   default:
1660     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1661   }
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1666 {
1667   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1668   Mat_SeqBAIJ    *Aloc;
1669   Mat            B;
1670   PetscErrorCode ierr;
1671   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1672   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1673   MatScalar      *a;
1674 
1675   PetscFunctionBegin;
1676   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1677     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1678     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1679     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1680     /* Do not know preallocation information, but must set block size */
1681     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1682   } else {
1683     B = *matout;
1684   }
1685 
1686   /* copy over the A part */
1687   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1688   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1689   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1690 
1691   for (i=0; i<mbs; i++) {
1692     rvals[0] = bs*(baij->rstartbs + i);
1693     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1694     for (j=ai[i]; j<ai[i+1]; j++) {
1695       col = (baij->cstartbs+aj[j])*bs;
1696       for (k=0; k<bs; k++) {
1697         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1698 
1699         col++; a += bs;
1700       }
1701     }
1702   }
1703   /* copy over the B part */
1704   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1705   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1706   for (i=0; i<mbs; i++) {
1707     rvals[0] = bs*(baij->rstartbs + i);
1708     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1709     for (j=ai[i]; j<ai[i+1]; j++) {
1710       col = baij->garray[aj[j]]*bs;
1711       for (k=0; k<bs; k++) {
1712         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1713         col++;
1714         a += bs;
1715       }
1716     }
1717   }
1718   ierr = PetscFree(rvals);CHKERRQ(ierr);
1719   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721 
1722   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1723   else {
1724     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1730 {
1731   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1732   Mat            a     = baij->A,b = baij->B;
1733   PetscErrorCode ierr;
1734   PetscInt       s1,s2,s3;
1735 
1736   PetscFunctionBegin;
1737   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1738   if (rr) {
1739     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1740     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1741     /* Overlap communication with computation. */
1742     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1743   }
1744   if (ll) {
1745     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1746     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1747     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1748   }
1749   /* scale  the diagonal block */
1750   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1751 
1752   if (rr) {
1753     /* Do a scatter end and then right scale the off-diagonal block */
1754     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1755     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1761 {
1762   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1763   PetscInt      *lrows;
1764   PetscInt       r, len;
1765   PetscBool      cong;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   /* get locally owned rows */
1770   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1771   /* fix right hand side if needed */
1772   if (x && b) {
1773     const PetscScalar *xx;
1774     PetscScalar       *bb;
1775 
1776     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1777     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1779     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1780     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1781   }
1782 
1783   /* actually zap the local rows */
1784   /*
1785         Zero the required rows. If the "diagonal block" of the matrix
1786      is square and the user wishes to set the diagonal we use separate
1787      code so that MatSetValues() is not called for each diagonal allocating
1788      new memory, thus calling lots of mallocs and slowing things down.
1789 
1790   */
1791   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1792   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1793   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1794   if ((diag != 0.0) && cong) {
1795     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1796   } else if (diag != 0.0) {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1798     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1799        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1800     for (r = 0; r < len; ++r) {
1801       const PetscInt row = lrows[r] + A->rmap->rstart;
1802       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1803     }
1804     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1805     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1808   }
1809   ierr = PetscFree(lrows);CHKERRQ(ierr);
1810 
1811   /* only change matrix nonzero state if pattern was allowed to be changed */
1812   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1813     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1814     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1820 {
1821   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1822   PetscErrorCode    ierr;
1823   PetscMPIInt       n = A->rmap->n;
1824   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1825   PetscInt          *lrows,*owners = A->rmap->range;
1826   PetscSFNode       *rrows;
1827   PetscSF           sf;
1828   const PetscScalar *xx;
1829   PetscScalar       *bb,*mask;
1830   Vec               xmask,lmask;
1831   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1832   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1833   PetscScalar       *aa;
1834 
1835   PetscFunctionBegin;
1836   /* Create SF where leaves are input rows and roots are owned rows */
1837   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1838   for (r = 0; r < n; ++r) lrows[r] = -1;
1839   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1840   for (r = 0; r < N; ++r) {
1841     const PetscInt idx   = rows[r];
1842     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1843     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1844       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1845     }
1846     rrows[r].rank  = p;
1847     rrows[r].index = rows[r] - owners[p];
1848   }
1849   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1850   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1851   /* Collect flags for rows to be zeroed */
1852   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1853   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1854   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1855   /* Compress and put in row numbers */
1856   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1857   /* zero diagonal part of matrix */
1858   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1859   /* handle off diagonal part of matrix */
1860   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1861   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1862   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1863   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1864   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1865   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1867   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1868   if (x) {
1869     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1870     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1871     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1872     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1873   }
1874   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1875   /* remove zeroed rows of off diagonal matrix */
1876   for (i = 0; i < len; ++i) {
1877     row   = lrows[i];
1878     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1879     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1880     for (k = 0; k < count; ++k) {
1881       aa[0] = 0.0;
1882       aa   += bs;
1883     }
1884   }
1885   /* loop over all elements of off process part of matrix zeroing removed columns*/
1886   for (i = 0; i < l->B->rmap->N; ++i) {
1887     row = i/bs;
1888     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1889       for (k = 0; k < bs; ++k) {
1890         col = bs*baij->j[j] + k;
1891         if (PetscAbsScalar(mask[col])) {
1892           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1893           if (x) bb[i] -= aa[0]*xx[col];
1894           aa[0] = 0.0;
1895         }
1896       }
1897     }
1898   }
1899   if (x) {
1900     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1901     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1902   }
1903   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1904   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1905   ierr = PetscFree(lrows);CHKERRQ(ierr);
1906 
1907   /* only change matrix nonzero state if pattern was allowed to be changed */
1908   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1909     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1910     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1916 {
1917   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1926 
1927 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1928 {
1929   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1930   Mat            a,b,c,d;
1931   PetscBool      flg;
1932   PetscErrorCode ierr;
1933 
1934   PetscFunctionBegin;
1935   a = matA->A; b = matA->B;
1936   c = matB->A; d = matB->B;
1937 
1938   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1939   if (flg) {
1940     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1941   }
1942   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1947 {
1948   PetscErrorCode ierr;
1949   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1950   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1951 
1952   PetscFunctionBegin;
1953   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1954   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1955     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1956   } else {
1957     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1958     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1959   }
1960   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1965 {
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1974 {
1975   PetscErrorCode ierr;
1976   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1977   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1978   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1979 
1980   PetscFunctionBegin;
1981   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1986 {
1987   PetscErrorCode ierr;
1988   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1989   PetscBLASInt   bnz,one=1;
1990   Mat_SeqBAIJ    *x,*y;
1991   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1992 
1993   PetscFunctionBegin;
1994   if (str == SAME_NONZERO_PATTERN) {
1995     PetscScalar alpha = a;
1996     x    = (Mat_SeqBAIJ*)xx->A->data;
1997     y    = (Mat_SeqBAIJ*)yy->A->data;
1998     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1999     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2000     x    = (Mat_SeqBAIJ*)xx->B->data;
2001     y    = (Mat_SeqBAIJ*)yy->B->data;
2002     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2003     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2004     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2005   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2006     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2007   } else {
2008     Mat      B;
2009     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2010     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2011     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2012     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2013     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2014     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2015     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2016     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2017     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2018     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2019     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2020     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2021     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2022     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2023     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2024     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2025   }
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2030 {
2031   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2032   PetscErrorCode ierr;
2033 
2034   PetscFunctionBegin;
2035   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2036   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2041 {
2042   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2047   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2052 {
2053   PetscErrorCode ierr;
2054   IS             iscol_local;
2055   PetscInt       csize;
2056 
2057   PetscFunctionBegin;
2058   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2059   if (call == MAT_REUSE_MATRIX) {
2060     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2061     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2062   } else {
2063     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2064   }
2065   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2066   if (call == MAT_INITIAL_MATRIX) {
2067     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2068     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 /*
2074   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2075   in local and then by concatenating the local matrices the end result.
2076   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2077   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2078 */
2079 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2080 {
2081   PetscErrorCode ierr;
2082   PetscMPIInt    rank,size;
2083   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2084   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2085   Mat            M,Mreuse;
2086   MatScalar      *vwork,*aa;
2087   MPI_Comm       comm;
2088   IS             isrow_new, iscol_new;
2089   Mat_SeqBAIJ    *aij;
2090 
2091   PetscFunctionBegin;
2092   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2093   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2095   /* The compression and expansion should be avoided. Doesn't point
2096      out errors, might change the indices, hence buggey */
2097   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2098   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2099 
2100   if (call ==  MAT_REUSE_MATRIX) {
2101     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2102     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2103     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2104   } else {
2105     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2106   }
2107   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2108   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2109   /*
2110       m - number of local rows
2111       n - number of columns (same on all processors)
2112       rstart - first row in new global matrix generated
2113   */
2114   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2115   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2116   m    = m/bs;
2117   n    = n/bs;
2118 
2119   if (call == MAT_INITIAL_MATRIX) {
2120     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2121     ii  = aij->i;
2122     jj  = aij->j;
2123 
2124     /*
2125         Determine the number of non-zeros in the diagonal and off-diagonal
2126         portions of the matrix in order to do correct preallocation
2127     */
2128 
2129     /* first get start and end of "diagonal" columns */
2130     if (csize == PETSC_DECIDE) {
2131       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2132       if (mglobal == n*bs) { /* square matrix */
2133         nlocal = m;
2134       } else {
2135         nlocal = n/size + ((n % size) > rank);
2136       }
2137     } else {
2138       nlocal = csize/bs;
2139     }
2140     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2141     rstart = rend - nlocal;
2142     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2143 
2144     /* next, compute all the lengths */
2145     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2146     for (i=0; i<m; i++) {
2147       jend = ii[i+1] - ii[i];
2148       olen = 0;
2149       dlen = 0;
2150       for (j=0; j<jend; j++) {
2151         if (*jj < rstart || *jj >= rend) olen++;
2152         else dlen++;
2153         jj++;
2154       }
2155       olens[i] = olen;
2156       dlens[i] = dlen;
2157     }
2158     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2159     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2160     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2161     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2162     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2163     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2164   } else {
2165     PetscInt ml,nl;
2166 
2167     M    = *newmat;
2168     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2169     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2170     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2171     /*
2172          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2173        rather than the slower MatSetValues().
2174     */
2175     M->was_assembled = PETSC_TRUE;
2176     M->assembled     = PETSC_FALSE;
2177   }
2178   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2179   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2180   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2181   ii   = aij->i;
2182   jj   = aij->j;
2183   aa   = aij->a;
2184   for (i=0; i<m; i++) {
2185     row   = rstart/bs + i;
2186     nz    = ii[i+1] - ii[i];
2187     cwork = jj;     jj += nz;
2188     vwork = aa;     aa += nz*bs*bs;
2189     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2190   }
2191 
2192   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2194   *newmat = M;
2195 
2196   /* save submatrix used in processor for next request */
2197   if (call ==  MAT_INITIAL_MATRIX) {
2198     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2199     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2205 {
2206   MPI_Comm       comm,pcomm;
2207   PetscInt       clocal_size,nrows;
2208   const PetscInt *rows;
2209   PetscMPIInt    size;
2210   IS             crowp,lcolp;
2211   PetscErrorCode ierr;
2212 
2213   PetscFunctionBegin;
2214   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2215   /* make a collective version of 'rowp' */
2216   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2217   if (pcomm==comm) {
2218     crowp = rowp;
2219   } else {
2220     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2221     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2222     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2223     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2224   }
2225   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2226   /* make a local version of 'colp' */
2227   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2228   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2229   if (size==1) {
2230     lcolp = colp;
2231   } else {
2232     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2233   }
2234   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2235   /* now we just get the submatrix */
2236   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2237   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2238   /* clean up */
2239   if (pcomm!=comm) {
2240     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2241   }
2242   if (size>1) {
2243     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2244   }
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2249 {
2250   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2251   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2252 
2253   PetscFunctionBegin;
2254   if (nghosts) *nghosts = B->nbs;
2255   if (ghosts) *ghosts = baij->garray;
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2260 {
2261   Mat            B;
2262   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2263   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2264   Mat_SeqAIJ     *b;
2265   PetscErrorCode ierr;
2266   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2267   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2268   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2269 
2270   PetscFunctionBegin;
2271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2272   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2273 
2274   /* ----------------------------------------------------------------
2275      Tell every processor the number of nonzeros per row
2276   */
2277   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2278   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2279     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2280   }
2281   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2282   displs    = recvcounts + size;
2283   for (i=0; i<size; i++) {
2284     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2285     displs[i]     = A->rmap->range[i]/bs;
2286   }
2287 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2288   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2289 #else
2290   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2291   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2292 #endif
2293   /* ---------------------------------------------------------------
2294      Create the sequential matrix of the same type as the local block diagonal
2295   */
2296   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2297   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2298   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2299   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2300   b    = (Mat_SeqAIJ*)B->data;
2301 
2302   /*--------------------------------------------------------------------
2303     Copy my part of matrix column indices over
2304   */
2305   sendcount  = ad->nz + bd->nz;
2306   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2307   a_jsendbuf = ad->j;
2308   b_jsendbuf = bd->j;
2309   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2310   cnt        = 0;
2311   for (i=0; i<n; i++) {
2312 
2313     /* put in lower diagonal portion */
2314     m = bd->i[i+1] - bd->i[i];
2315     while (m > 0) {
2316       /* is it above diagonal (in bd (compressed) numbering) */
2317       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2318       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2319       m--;
2320     }
2321 
2322     /* put in diagonal portion */
2323     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2324       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2325     }
2326 
2327     /* put in upper diagonal portion */
2328     while (m-- > 0) {
2329       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2330     }
2331   }
2332   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2333 
2334   /*--------------------------------------------------------------------
2335     Gather all column indices to all processors
2336   */
2337   for (i=0; i<size; i++) {
2338     recvcounts[i] = 0;
2339     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2340       recvcounts[i] += lens[j];
2341     }
2342   }
2343   displs[0] = 0;
2344   for (i=1; i<size; i++) {
2345     displs[i] = displs[i-1] + recvcounts[i-1];
2346   }
2347 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2348   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2349 #else
2350   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2351 #endif
2352   /*--------------------------------------------------------------------
2353     Assemble the matrix into useable form (note numerical values not yet set)
2354   */
2355   /* set the b->ilen (length of each row) values */
2356   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2357   /* set the b->i indices */
2358   b->i[0] = 0;
2359   for (i=1; i<=A->rmap->N/bs; i++) {
2360     b->i[i] = b->i[i-1] + lens[i-1];
2361   }
2362   ierr = PetscFree(lens);CHKERRQ(ierr);
2363   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2366 
2367   if (A->symmetric) {
2368     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2369   } else if (A->hermitian) {
2370     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2371   } else if (A->structurally_symmetric) {
2372     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2373   }
2374   *newmat = B;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2379 {
2380   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2381   PetscErrorCode ierr;
2382   Vec            bb1 = 0;
2383 
2384   PetscFunctionBegin;
2385   if (flag == SOR_APPLY_UPPER) {
2386     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2387     PetscFunctionReturn(0);
2388   }
2389 
2390   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2391     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2392   }
2393 
2394   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2395     if (flag & SOR_ZERO_INITIAL_GUESS) {
2396       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2397       its--;
2398     }
2399 
2400     while (its--) {
2401       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2403 
2404       /* update rhs: bb1 = bb - B*x */
2405       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2406       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2407 
2408       /* local sweep */
2409       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2410     }
2411   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2412     if (flag & SOR_ZERO_INITIAL_GUESS) {
2413       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2414       its--;
2415     }
2416     while (its--) {
2417       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2418       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2419 
2420       /* update rhs: bb1 = bb - B*x */
2421       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2422       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2423 
2424       /* local sweep */
2425       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2426     }
2427   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2428     if (flag & SOR_ZERO_INITIAL_GUESS) {
2429       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2430       its--;
2431     }
2432     while (its--) {
2433       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2434       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2435 
2436       /* update rhs: bb1 = bb - B*x */
2437       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2438       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2439 
2440       /* local sweep */
2441       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2442     }
2443   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2444 
2445   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2450 {
2451   PetscErrorCode ierr;
2452   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2453   PetscInt       N,i,*garray = aij->garray;
2454   PetscInt       ib,jb,bs = A->rmap->bs;
2455   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2456   MatScalar      *a_val = a_aij->a;
2457   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2458   MatScalar      *b_val = b_aij->a;
2459   PetscReal      *work;
2460 
2461   PetscFunctionBegin;
2462   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2463   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2464   if (type == NORM_2) {
2465     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2466       for (jb=0; jb<bs; jb++) {
2467         for (ib=0; ib<bs; ib++) {
2468           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2469           a_val++;
2470         }
2471       }
2472     }
2473     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2474       for (jb=0; jb<bs; jb++) {
2475         for (ib=0; ib<bs; ib++) {
2476           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2477           b_val++;
2478         }
2479       }
2480     }
2481   } else if (type == NORM_1) {
2482     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2486           a_val++;
2487         }
2488       }
2489     }
2490     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2491       for (jb=0; jb<bs; jb++) {
2492        for (ib=0; ib<bs; ib++) {
2493           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2494           b_val++;
2495         }
2496       }
2497     }
2498   } else if (type == NORM_INFINITY) {
2499     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2500       for (jb=0; jb<bs; jb++) {
2501         for (ib=0; ib<bs; ib++) {
2502           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2503           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2504           a_val++;
2505         }
2506       }
2507     }
2508     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2509       for (jb=0; jb<bs; jb++) {
2510         for (ib=0; ib<bs; ib++) {
2511           int col = garray[b_aij->j[i]] * bs + jb;
2512           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2513           b_val++;
2514         }
2515       }
2516     }
2517   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2518   if (type == NORM_INFINITY) {
2519     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2520   } else {
2521     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2522   }
2523   ierr = PetscFree(work);CHKERRQ(ierr);
2524   if (type == NORM_2) {
2525     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2526   }
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2531 {
2532   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2533   PetscErrorCode ierr;
2534 
2535   PetscFunctionBegin;
2536   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2537   A->factorerrortype             = a->A->factorerrortype;
2538   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2539   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2544 {
2545   PetscErrorCode ierr;
2546   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2547   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2548 
2549   PetscFunctionBegin;
2550   if (!Y->preallocated) {
2551     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2552   } else if (!aij->nz) {
2553     PetscInt nonew = aij->nonew;
2554     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2555     aij->nonew = nonew;
2556   }
2557   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2562 {
2563   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2568   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2569   if (d) {
2570     PetscInt rstart;
2571     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2572     *d += rstart/A->rmap->bs;
2573 
2574   }
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2579 {
2580   PetscFunctionBegin;
2581   *a = ((Mat_MPIBAIJ*)A->data)->A;
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 /* -------------------------------------------------------------------*/
2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2587                                        MatGetRow_MPIBAIJ,
2588                                        MatRestoreRow_MPIBAIJ,
2589                                        MatMult_MPIBAIJ,
2590                                 /* 4*/ MatMultAdd_MPIBAIJ,
2591                                        MatMultTranspose_MPIBAIJ,
2592                                        MatMultTransposeAdd_MPIBAIJ,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*10*/ 0,
2597                                        0,
2598                                        0,
2599                                        MatSOR_MPIBAIJ,
2600                                        MatTranspose_MPIBAIJ,
2601                                 /*15*/ MatGetInfo_MPIBAIJ,
2602                                        MatEqual_MPIBAIJ,
2603                                        MatGetDiagonal_MPIBAIJ,
2604                                        MatDiagonalScale_MPIBAIJ,
2605                                        MatNorm_MPIBAIJ,
2606                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2607                                        MatAssemblyEnd_MPIBAIJ,
2608                                        MatSetOption_MPIBAIJ,
2609                                        MatZeroEntries_MPIBAIJ,
2610                                 /*24*/ MatZeroRows_MPIBAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*29*/ MatSetUp_MPIBAIJ,
2616                                        0,
2617                                        0,
2618                                        MatGetDiagonalBlock_MPIBAIJ,
2619                                        0,
2620                                 /*34*/ MatDuplicate_MPIBAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*39*/ MatAXPY_MPIBAIJ,
2626                                        MatCreateSubMatrices_MPIBAIJ,
2627                                        MatIncreaseOverlap_MPIBAIJ,
2628                                        MatGetValues_MPIBAIJ,
2629                                        MatCopy_MPIBAIJ,
2630                                 /*44*/ 0,
2631                                        MatScale_MPIBAIJ,
2632                                        MatShift_MPIBAIJ,
2633                                        0,
2634                                        MatZeroRowsColumns_MPIBAIJ,
2635                                 /*49*/ 0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2641                                        0,
2642                                        MatSetUnfactored_MPIBAIJ,
2643                                        MatPermute_MPIBAIJ,
2644                                        MatSetValuesBlocked_MPIBAIJ,
2645                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2646                                        MatDestroy_MPIBAIJ,
2647                                        MatView_MPIBAIJ,
2648                                        0,
2649                                        0,
2650                                 /*64*/ 0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*74*/ 0,
2661                                        MatFDColoringApply_BAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*79*/ 0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        MatLoad_MPIBAIJ,
2670                                 /*84*/ 0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*89*/ 0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*94*/ 0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*99*/ 0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                 /*104*/0,
2691                                        MatRealPart_MPIBAIJ,
2692                                        MatImaginaryPart_MPIBAIJ,
2693                                        0,
2694                                        0,
2695                                 /*109*/0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        MatMissingDiagonal_MPIBAIJ,
2700                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2701                                        0,
2702                                        MatGetGhosts_MPIBAIJ,
2703                                        0,
2704                                        0,
2705                                 /*119*/0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        MatGetMultiProcBlock_MPIBAIJ,
2710                                 /*124*/0,
2711                                        MatGetColumnNorms_MPIBAIJ,
2712                                        MatInvertBlockDiagonal_MPIBAIJ,
2713                                        0,
2714                                        0,
2715                                /*129*/ 0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /*134*/ 0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                /*139*/ MatSetBlockSizes_Default,
2726                                        0,
2727                                        0,
2728                                        MatFDColoringSetUp_MPIXAIJ,
2729                                        0,
2730                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2731 };
2732 
2733 
2734 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2735 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2736 
2737 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2738 {
2739   PetscInt       m,rstart,cstart,cend;
2740   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2741   const PetscInt *JJ    =0;
2742   PetscScalar    *values=0;
2743   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2748   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2749   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2750   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2751   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2752   m      = B->rmap->n/bs;
2753   rstart = B->rmap->rstart/bs;
2754   cstart = B->cmap->rstart/bs;
2755   cend   = B->cmap->rend/bs;
2756 
2757   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2758   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2759   for (i=0; i<m; i++) {
2760     nz = ii[i+1] - ii[i];
2761     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2762     nz_max = PetscMax(nz_max,nz);
2763     dlen   = 0;
2764     olen   = 0;
2765     JJ     = jj + ii[i];
2766     for (j=0; j<nz; j++) {
2767       if (*JJ < cstart || *JJ >= cend) olen++;
2768       else dlen++;
2769       JJ++;
2770     }
2771     d_nnz[i] = dlen;
2772     o_nnz[i] = olen;
2773   }
2774   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2775   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2776 
2777   values = (PetscScalar*)V;
2778   if (!values) {
2779     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2780   }
2781   for (i=0; i<m; i++) {
2782     PetscInt          row    = i + rstart;
2783     PetscInt          ncols  = ii[i+1] - ii[i];
2784     const PetscInt    *icols = jj + ii[i];
2785     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2786       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2787       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2788     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2789       PetscInt j;
2790       for (j=0; j<ncols; j++) {
2791         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2792         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2793       }
2794     }
2795   }
2796 
2797   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2798   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2800   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 /*@C
2805    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2806    (the default parallel PETSc format).
2807 
2808    Collective on MPI_Comm
2809 
2810    Input Parameters:
2811 +  B - the matrix
2812 .  bs - the block size
2813 .  i - the indices into j for the start of each local row (starts with zero)
2814 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2815 -  v - optional values in the matrix
2816 
2817    Level: developer
2818 
2819    Notes:
2820     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2821    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2822    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2823    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2824    block column and the second index is over columns within a block.
2825 
2826 .keywords: matrix, aij, compressed row, sparse, parallel
2827 
2828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2829 @*/
2830 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2831 {
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2836   PetscValidType(B,1);
2837   PetscValidLogicalCollectiveInt(B,bs,2);
2838   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2843 {
2844   Mat_MPIBAIJ    *b;
2845   PetscErrorCode ierr;
2846   PetscInt       i;
2847 
2848   PetscFunctionBegin;
2849   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2850   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2851   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2852   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2853 
2854   if (d_nnz) {
2855     for (i=0; i<B->rmap->n/bs; i++) {
2856       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2857     }
2858   }
2859   if (o_nnz) {
2860     for (i=0; i<B->rmap->n/bs; i++) {
2861       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2862     }
2863   }
2864 
2865   b      = (Mat_MPIBAIJ*)B->data;
2866   b->bs2 = bs*bs;
2867   b->mbs = B->rmap->n/bs;
2868   b->nbs = B->cmap->n/bs;
2869   b->Mbs = B->rmap->N/bs;
2870   b->Nbs = B->cmap->N/bs;
2871 
2872   for (i=0; i<=b->size; i++) {
2873     b->rangebs[i] = B->rmap->range[i]/bs;
2874   }
2875   b->rstartbs = B->rmap->rstart/bs;
2876   b->rendbs   = B->rmap->rend/bs;
2877   b->cstartbs = B->cmap->rstart/bs;
2878   b->cendbs   = B->cmap->rend/bs;
2879 
2880 #if defined(PETSC_USE_CTABLE)
2881   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2882 #else
2883   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2884 #endif
2885   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2886   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2887   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2888 
2889   /* Because the B will have been resized we simply destroy it and create a new one each time */
2890   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2891   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2892   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2893   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2894   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2895 
2896   if (!B->preallocated) {
2897     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2898     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2899     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2900     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2901     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2902   }
2903 
2904   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2905   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2906   B->preallocated  = PETSC_TRUE;
2907   B->was_assembled = PETSC_FALSE;
2908   B->assembled     = PETSC_FALSE;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2913 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2914 
2915 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2916 {
2917   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2918   PetscErrorCode ierr;
2919   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2920   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2921   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2922 
2923   PetscFunctionBegin;
2924   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2925   ii[0] = 0;
2926   for (i=0; i<M; i++) {
2927     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2928     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2929     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2930     /* remove one from count of matrix has diagonal */
2931     for (j=id[i]; j<id[i+1]; j++) {
2932       if (jd[j] == i) {ii[i+1]--;break;}
2933     }
2934   }
2935   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2936   cnt  = 0;
2937   for (i=0; i<M; i++) {
2938     for (j=io[i]; j<io[i+1]; j++) {
2939       if (garray[jo[j]] > rstart) break;
2940       jj[cnt++] = garray[jo[j]];
2941     }
2942     for (k=id[i]; k<id[i+1]; k++) {
2943       if (jd[k] != i) {
2944         jj[cnt++] = rstart + jd[k];
2945       }
2946     }
2947     for (; j<io[i+1]; j++) {
2948       jj[cnt++] = garray[jo[j]];
2949     }
2950   }
2951   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2956 
2957 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2958 
2959 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2960 {
2961   PetscErrorCode ierr;
2962   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2963   Mat            B;
2964   Mat_MPIAIJ     *b;
2965 
2966   PetscFunctionBegin;
2967   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2968 
2969   if (reuse == MAT_REUSE_MATRIX) {
2970     B = *newmat;
2971   } else {
2972     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2973     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2974     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2975     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2976     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2977     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2978   }
2979   b = (Mat_MPIAIJ*) B->data;
2980 
2981   if (reuse == MAT_REUSE_MATRIX) {
2982     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2983     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2984   } else {
2985     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2986     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2987     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2988     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2989     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2990     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2991     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2992   }
2993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995 
2996   if (reuse == MAT_INPLACE_MATRIX) {
2997     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2998   } else {
2999    *newmat = B;
3000   }
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 /*MC
3005    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3006 
3007    Options Database Keys:
3008 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3009 . -mat_block_size <bs> - set the blocksize used to store the matrix
3010 - -mat_use_hash_table <fact>
3011 
3012   Level: beginner
3013 
3014 .seealso: MatCreateMPIBAIJ
3015 M*/
3016 
3017 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3018 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3019 
3020 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3021 {
3022   Mat_MPIBAIJ    *b;
3023   PetscErrorCode ierr;
3024   PetscBool      flg = PETSC_FALSE;
3025 
3026   PetscFunctionBegin;
3027   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3028   B->data = (void*)b;
3029 
3030   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3031   B->assembled = PETSC_FALSE;
3032 
3033   B->insertmode = NOT_SET_VALUES;
3034   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3035   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3036 
3037   /* build local table of row and column ownerships */
3038   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3039 
3040   /* build cache for off array entries formed */
3041   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3042 
3043   b->donotstash  = PETSC_FALSE;
3044   b->colmap      = NULL;
3045   b->garray      = NULL;
3046   b->roworiented = PETSC_TRUE;
3047 
3048   /* stuff used in block assembly */
3049   b->barray = 0;
3050 
3051   /* stuff used for matrix vector multiply */
3052   b->lvec  = 0;
3053   b->Mvctx = 0;
3054 
3055   /* stuff for MatGetRow() */
3056   b->rowindices   = 0;
3057   b->rowvalues    = 0;
3058   b->getrowactive = PETSC_FALSE;
3059 
3060   /* hash table stuff */
3061   b->ht           = 0;
3062   b->hd           = 0;
3063   b->ht_size      = 0;
3064   b->ht_flag      = PETSC_FALSE;
3065   b->ht_fact      = 0;
3066   b->ht_total_ct  = 0;
3067   b->ht_insert_ct = 0;
3068 
3069   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3070   b->ijonly = PETSC_FALSE;
3071 
3072 
3073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3076 #if defined(PETSC_HAVE_HYPRE)
3077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3078 #endif
3079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3087   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3088 
3089   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3090   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3091   if (flg) {
3092     PetscReal fact = 1.39;
3093     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3094     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3095     if (fact <= 1.0) fact = 1.39;
3096     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3097     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3098   }
3099   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 /*MC
3104    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3105 
3106    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3107    and MATMPIBAIJ otherwise.
3108 
3109    Options Database Keys:
3110 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3111 
3112   Level: beginner
3113 
3114 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3115 M*/
3116 
3117 /*@C
3118    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3119    (block compressed row).  For good matrix assembly performance
3120    the user should preallocate the matrix storage by setting the parameters
3121    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3122    performance can be increased by more than a factor of 50.
3123 
3124    Collective on Mat
3125 
3126    Input Parameters:
3127 +  B - the matrix
3128 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3129           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3130 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3131            submatrix  (same for all local rows)
3132 .  d_nnz - array containing the number of block nonzeros in the various block rows
3133            of the in diagonal portion of the local (possibly different for each block
3134            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3135            set it even if it is zero.
3136 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3137            submatrix (same for all local rows).
3138 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3139            off-diagonal portion of the local submatrix (possibly different for
3140            each block row) or NULL.
3141 
3142    If the *_nnz parameter is given then the *_nz parameter is ignored
3143 
3144    Options Database Keys:
3145 +   -mat_block_size - size of the blocks to use
3146 -   -mat_use_hash_table <fact>
3147 
3148    Notes:
3149    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3150    than it must be used on all processors that share the object for that argument.
3151 
3152    Storage Information:
3153    For a square global matrix we define each processor's diagonal portion
3154    to be its local rows and the corresponding columns (a square submatrix);
3155    each processor's off-diagonal portion encompasses the remainder of the
3156    local matrix (a rectangular submatrix).
3157 
3158    The user can specify preallocated storage for the diagonal part of
3159    the local submatrix with either d_nz or d_nnz (not both).  Set
3160    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3161    memory allocation.  Likewise, specify preallocated storage for the
3162    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3163 
3164    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3165    the figure below we depict these three local rows and all columns (0-11).
3166 
3167 .vb
3168            0 1 2 3 4 5 6 7 8 9 10 11
3169           --------------------------
3170    row 3  |o o o d d d o o o o  o  o
3171    row 4  |o o o d d d o o o o  o  o
3172    row 5  |o o o d d d o o o o  o  o
3173           --------------------------
3174 .ve
3175 
3176    Thus, any entries in the d locations are stored in the d (diagonal)
3177    submatrix, and any entries in the o locations are stored in the
3178    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3179    stored simply in the MATSEQBAIJ format for compressed row storage.
3180 
3181    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3182    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3183    In general, for PDE problems in which most nonzeros are near the diagonal,
3184    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3185    or you will get TERRIBLE performance; see the users' manual chapter on
3186    matrices.
3187 
3188    You can call MatGetInfo() to get information on how effective the preallocation was;
3189    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3190    You can also run with the option -info and look for messages with the string
3191    malloc in them to see if additional memory allocation was needed.
3192 
3193    Level: intermediate
3194 
3195 .keywords: matrix, block, aij, compressed row, sparse, parallel
3196 
3197 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3198 @*/
3199 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3200 {
3201   PetscErrorCode ierr;
3202 
3203   PetscFunctionBegin;
3204   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3205   PetscValidType(B,1);
3206   PetscValidLogicalCollectiveInt(B,bs,2);
3207   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 /*@C
3212    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3213    (block compressed row).  For good matrix assembly performance
3214    the user should preallocate the matrix storage by setting the parameters
3215    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3216    performance can be increased by more than a factor of 50.
3217 
3218    Collective on MPI_Comm
3219 
3220    Input Parameters:
3221 +  comm - MPI communicator
3222 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3223           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3224 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3225            This value should be the same as the local size used in creating the
3226            y vector for the matrix-vector product y = Ax.
3227 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3228            This value should be the same as the local size used in creating the
3229            x vector for the matrix-vector product y = Ax.
3230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3232 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3233            submatrix  (same for all local rows)
3234 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3235            of the in diagonal portion of the local (possibly different for each block
3236            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3237            and set it even if it is zero.
3238 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3239            submatrix (same for all local rows).
3240 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3241            off-diagonal portion of the local submatrix (possibly different for
3242            each block row) or NULL.
3243 
3244    Output Parameter:
3245 .  A - the matrix
3246 
3247    Options Database Keys:
3248 +   -mat_block_size - size of the blocks to use
3249 -   -mat_use_hash_table <fact>
3250 
3251    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3252    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3253    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3254 
3255    Notes:
3256    If the *_nnz parameter is given then the *_nz parameter is ignored
3257 
3258    A nonzero block is any block that as 1 or more nonzeros in it
3259 
3260    The user MUST specify either the local or global matrix dimensions
3261    (possibly both).
3262 
3263    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3264    than it must be used on all processors that share the object for that argument.
3265 
3266    Storage Information:
3267    For a square global matrix we define each processor's diagonal portion
3268    to be its local rows and the corresponding columns (a square submatrix);
3269    each processor's off-diagonal portion encompasses the remainder of the
3270    local matrix (a rectangular submatrix).
3271 
3272    The user can specify preallocated storage for the diagonal part of
3273    the local submatrix with either d_nz or d_nnz (not both).  Set
3274    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3275    memory allocation.  Likewise, specify preallocated storage for the
3276    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3277 
3278    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3279    the figure below we depict these three local rows and all columns (0-11).
3280 
3281 .vb
3282            0 1 2 3 4 5 6 7 8 9 10 11
3283           --------------------------
3284    row 3  |o o o d d d o o o o  o  o
3285    row 4  |o o o d d d o o o o  o  o
3286    row 5  |o o o d d d o o o o  o  o
3287           --------------------------
3288 .ve
3289 
3290    Thus, any entries in the d locations are stored in the d (diagonal)
3291    submatrix, and any entries in the o locations are stored in the
3292    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3293    stored simply in the MATSEQBAIJ format for compressed row storage.
3294 
3295    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3296    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3297    In general, for PDE problems in which most nonzeros are near the diagonal,
3298    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3299    or you will get TERRIBLE performance; see the users' manual chapter on
3300    matrices.
3301 
3302    Level: intermediate
3303 
3304 .keywords: matrix, block, aij, compressed row, sparse, parallel
3305 
3306 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3307 @*/
3308 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3309 {
3310   PetscErrorCode ierr;
3311   PetscMPIInt    size;
3312 
3313   PetscFunctionBegin;
3314   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3315   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3317   if (size > 1) {
3318     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3319     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3320   } else {
3321     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3322     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3323   }
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3328 {
3329   Mat            mat;
3330   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3331   PetscErrorCode ierr;
3332   PetscInt       len=0;
3333 
3334   PetscFunctionBegin;
3335   *newmat = 0;
3336   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3337   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3338   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3339   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3340 
3341   mat->factortype   = matin->factortype;
3342   mat->preallocated = PETSC_TRUE;
3343   mat->assembled    = PETSC_TRUE;
3344   mat->insertmode   = NOT_SET_VALUES;
3345 
3346   a             = (Mat_MPIBAIJ*)mat->data;
3347   mat->rmap->bs = matin->rmap->bs;
3348   a->bs2        = oldmat->bs2;
3349   a->mbs        = oldmat->mbs;
3350   a->nbs        = oldmat->nbs;
3351   a->Mbs        = oldmat->Mbs;
3352   a->Nbs        = oldmat->Nbs;
3353 
3354   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3355   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3356 
3357   a->size         = oldmat->size;
3358   a->rank         = oldmat->rank;
3359   a->donotstash   = oldmat->donotstash;
3360   a->roworiented  = oldmat->roworiented;
3361   a->rowindices   = 0;
3362   a->rowvalues    = 0;
3363   a->getrowactive = PETSC_FALSE;
3364   a->barray       = 0;
3365   a->rstartbs     = oldmat->rstartbs;
3366   a->rendbs       = oldmat->rendbs;
3367   a->cstartbs     = oldmat->cstartbs;
3368   a->cendbs       = oldmat->cendbs;
3369 
3370   /* hash table stuff */
3371   a->ht           = 0;
3372   a->hd           = 0;
3373   a->ht_size      = 0;
3374   a->ht_flag      = oldmat->ht_flag;
3375   a->ht_fact      = oldmat->ht_fact;
3376   a->ht_total_ct  = 0;
3377   a->ht_insert_ct = 0;
3378 
3379   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3380   if (oldmat->colmap) {
3381 #if defined(PETSC_USE_CTABLE)
3382     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3383 #else
3384     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3385     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3386     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3387 #endif
3388   } else a->colmap = 0;
3389 
3390   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3391     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3392     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3393     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3394   } else a->garray = 0;
3395 
3396   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3397   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3398   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3399   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3400   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3401 
3402   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3403   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3404   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3405   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3406   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3407   *newmat = mat;
3408   PetscFunctionReturn(0);
3409 }
3410 
3411 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3412 {
3413   PetscErrorCode ierr;
3414   int            fd;
3415   PetscInt       i,nz,j,rstart,rend;
3416   PetscScalar    *vals,*buf;
3417   MPI_Comm       comm;
3418   MPI_Status     status;
3419   PetscMPIInt    rank,size,maxnz;
3420   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3421   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3422   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3423   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3424   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3425   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3426 
3427   PetscFunctionBegin;
3428   /* force binary viewer to load .info file if it has not yet done so */
3429   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3430   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3431   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3432   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3433   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3434   if (bs < 0) bs = 1;
3435 
3436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3437   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3438   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3439   if (!rank) {
3440     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3441     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3442     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3443   }
3444   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3445   M    = header[1]; N = header[2];
3446 
3447   /* If global sizes are set, check if they are consistent with that given in the file */
3448   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3449   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3450 
3451   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3452 
3453   /*
3454      This code adds extra rows to make sure the number of rows is
3455      divisible by the blocksize
3456   */
3457   Mbs        = M/bs;
3458   extra_rows = bs - M + bs*Mbs;
3459   if (extra_rows == bs) extra_rows = 0;
3460   else                  Mbs++;
3461   if (extra_rows && !rank) {
3462     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3463   }
3464 
3465   /* determine ownership of all rows */
3466   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3467     mbs = Mbs/size + ((Mbs % size) > rank);
3468     m   = mbs*bs;
3469   } else { /* User set */
3470     m   = newmat->rmap->n;
3471     mbs = m/bs;
3472   }
3473   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3474   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3475 
3476   /* process 0 needs enough room for process with most rows */
3477   if (!rank) {
3478     mmax = rowners[1];
3479     for (i=2; i<=size; i++) {
3480       mmax = PetscMax(mmax,rowners[i]);
3481     }
3482     mmax*=bs;
3483   } else mmax = -1;             /* unused, but compiler warns anyway */
3484 
3485   rowners[0] = 0;
3486   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3487   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3488   rstart = rowners[rank];
3489   rend   = rowners[rank+1];
3490 
3491   /* distribute row lengths to all processors */
3492   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3493   if (!rank) {
3494     mend = m;
3495     if (size == 1) mend = mend - extra_rows;
3496     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3497     for (j=mend; j<m; j++) locrowlens[j] = 1;
3498     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3499     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3500     for (j=0; j<m; j++) {
3501       procsnz[0] += locrowlens[j];
3502     }
3503     for (i=1; i<size; i++) {
3504       mend = browners[i+1] - browners[i];
3505       if (i == size-1) mend = mend - extra_rows;
3506       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3507       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3508       /* calculate the number of nonzeros on each processor */
3509       for (j=0; j<browners[i+1]-browners[i]; j++) {
3510         procsnz[i] += rowlengths[j];
3511       }
3512       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3513     }
3514     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3515   } else {
3516     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3517   }
3518 
3519   if (!rank) {
3520     /* determine max buffer needed and allocate it */
3521     maxnz = procsnz[0];
3522     for (i=1; i<size; i++) {
3523       maxnz = PetscMax(maxnz,procsnz[i]);
3524     }
3525     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3526 
3527     /* read in my part of the matrix column indices  */
3528     nz     = procsnz[0];
3529     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3530     mycols = ibuf;
3531     if (size == 1) nz -= extra_rows;
3532     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3533     if (size == 1) {
3534       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3535     }
3536 
3537     /* read in every ones (except the last) and ship off */
3538     for (i=1; i<size-1; i++) {
3539       nz   = procsnz[i];
3540       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3541       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3542     }
3543     /* read in the stuff for the last proc */
3544     if (size != 1) {
3545       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3546       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3547       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3548       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3549     }
3550     ierr = PetscFree(cols);CHKERRQ(ierr);
3551   } else {
3552     /* determine buffer space needed for message */
3553     nz = 0;
3554     for (i=0; i<m; i++) {
3555       nz += locrowlens[i];
3556     }
3557     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3558     mycols = ibuf;
3559     /* receive message of column indices*/
3560     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3561     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3562     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3563   }
3564 
3565   /* loop over local rows, determining number of off diagonal entries */
3566   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3567   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3568   rowcount = 0; nzcount = 0;
3569   for (i=0; i<mbs; i++) {
3570     dcount  = 0;
3571     odcount = 0;
3572     for (j=0; j<bs; j++) {
3573       kmax = locrowlens[rowcount];
3574       for (k=0; k<kmax; k++) {
3575         tmp = mycols[nzcount++]/bs;
3576         if (!mask[tmp]) {
3577           mask[tmp] = 1;
3578           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3579           else masked1[dcount++] = tmp;
3580         }
3581       }
3582       rowcount++;
3583     }
3584 
3585     dlens[i]  = dcount;
3586     odlens[i] = odcount;
3587 
3588     /* zero out the mask elements we set */
3589     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3590     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3591   }
3592 
3593   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3594   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3595 
3596   if (!rank) {
3597     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3598     /* read in my part of the matrix numerical values  */
3599     nz     = procsnz[0];
3600     vals   = buf;
3601     mycols = ibuf;
3602     if (size == 1) nz -= extra_rows;
3603     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3604     if (size == 1) {
3605       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3606     }
3607 
3608     /* insert into matrix */
3609     jj = rstart*bs;
3610     for (i=0; i<m; i++) {
3611       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3612       mycols += locrowlens[i];
3613       vals   += locrowlens[i];
3614       jj++;
3615     }
3616     /* read in other processors (except the last one) and ship out */
3617     for (i=1; i<size-1; i++) {
3618       nz   = procsnz[i];
3619       vals = buf;
3620       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3621       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3622     }
3623     /* the last proc */
3624     if (size != 1) {
3625       nz   = procsnz[i] - extra_rows;
3626       vals = buf;
3627       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3628       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3629       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3630     }
3631     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3632   } else {
3633     /* receive numeric values */
3634     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3635 
3636     /* receive message of values*/
3637     vals   = buf;
3638     mycols = ibuf;
3639     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3640 
3641     /* insert into matrix */
3642     jj = rstart*bs;
3643     for (i=0; i<m; i++) {
3644       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3645       mycols += locrowlens[i];
3646       vals   += locrowlens[i];
3647       jj++;
3648     }
3649   }
3650   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3651   ierr = PetscFree(buf);CHKERRQ(ierr);
3652   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3653   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3654   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3655   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3656   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   PetscFunctionReturn(0);
3659 }
3660 
3661 /*@
3662    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3663 
3664    Input Parameters:
3665 .  mat  - the matrix
3666 .  fact - factor
3667 
3668    Not Collective, each process can use a different factor
3669 
3670    Level: advanced
3671 
3672   Notes:
3673    This can also be set by the command line option: -mat_use_hash_table <fact>
3674 
3675 .keywords: matrix, hashtable, factor, HT
3676 
3677 .seealso: MatSetOption()
3678 @*/
3679 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3680 {
3681   PetscErrorCode ierr;
3682 
3683   PetscFunctionBegin;
3684   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3685   PetscFunctionReturn(0);
3686 }
3687 
3688 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3689 {
3690   Mat_MPIBAIJ *baij;
3691 
3692   PetscFunctionBegin;
3693   baij          = (Mat_MPIBAIJ*)mat->data;
3694   baij->ht_fact = fact;
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3699 {
3700   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3701   PetscBool      flg;
3702   PetscErrorCode ierr;
3703 
3704   PetscFunctionBegin;
3705   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3706   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3707   if (Ad)     *Ad     = a->A;
3708   if (Ao)     *Ao     = a->B;
3709   if (colmap) *colmap = a->garray;
3710   PetscFunctionReturn(0);
3711 }
3712 
3713 /*
3714     Special version for direct calls from Fortran (to eliminate two function call overheads
3715 */
3716 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3717 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3719 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3720 #endif
3721 
3722 /*@C
3723   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3724 
3725   Collective on Mat
3726 
3727   Input Parameters:
3728 + mat - the matrix
3729 . min - number of input rows
3730 . im - input rows
3731 . nin - number of input columns
3732 . in - input columns
3733 . v - numerical values input
3734 - addvin - INSERT_VALUES or ADD_VALUES
3735 
3736   Notes:
3737     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3738 
3739   Level: advanced
3740 
3741 .seealso:   MatSetValuesBlocked()
3742 @*/
3743 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3744 {
3745   /* convert input arguments to C version */
3746   Mat        mat  = *matin;
3747   PetscInt   m    = *min, n = *nin;
3748   InsertMode addv = *addvin;
3749 
3750   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3751   const MatScalar *value;
3752   MatScalar       *barray     = baij->barray;
3753   PetscBool       roworiented = baij->roworiented;
3754   PetscErrorCode  ierr;
3755   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3756   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3757   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3758 
3759   PetscFunctionBegin;
3760   /* tasks normally handled by MatSetValuesBlocked() */
3761   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3762 #if defined(PETSC_USE_DEBUG)
3763   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3764   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3765 #endif
3766   if (mat->assembled) {
3767     mat->was_assembled = PETSC_TRUE;
3768     mat->assembled     = PETSC_FALSE;
3769   }
3770   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3771 
3772 
3773   if (!barray) {
3774     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3775     baij->barray = barray;
3776   }
3777 
3778   if (roworiented) stepval = (n-1)*bs;
3779   else stepval = (m-1)*bs;
3780 
3781   for (i=0; i<m; i++) {
3782     if (im[i] < 0) continue;
3783 #if defined(PETSC_USE_DEBUG)
3784     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3785 #endif
3786     if (im[i] >= rstart && im[i] < rend) {
3787       row = im[i] - rstart;
3788       for (j=0; j<n; j++) {
3789         /* If NumCol = 1 then a copy is not required */
3790         if ((roworiented) && (n == 1)) {
3791           barray = (MatScalar*)v + i*bs2;
3792         } else if ((!roworiented) && (m == 1)) {
3793           barray = (MatScalar*)v + j*bs2;
3794         } else { /* Here a copy is required */
3795           if (roworiented) {
3796             value = v + i*(stepval+bs)*bs + j*bs;
3797           } else {
3798             value = v + j*(stepval+bs)*bs + i*bs;
3799           }
3800           for (ii=0; ii<bs; ii++,value+=stepval) {
3801             for (jj=0; jj<bs; jj++) {
3802               *barray++ = *value++;
3803             }
3804           }
3805           barray -=bs2;
3806         }
3807 
3808         if (in[j] >= cstart && in[j] < cend) {
3809           col  = in[j] - cstart;
3810           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3811         } else if (in[j] < 0) continue;
3812 #if defined(PETSC_USE_DEBUG)
3813         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3814 #endif
3815         else {
3816           if (mat->was_assembled) {
3817             if (!baij->colmap) {
3818               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3819             }
3820 
3821 #if defined(PETSC_USE_DEBUG)
3822 #if defined(PETSC_USE_CTABLE)
3823             { PetscInt data;
3824               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3825               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3826             }
3827 #else
3828             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3829 #endif
3830 #endif
3831 #if defined(PETSC_USE_CTABLE)
3832             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3833             col  = (col - 1)/bs;
3834 #else
3835             col = (baij->colmap[in[j]] - 1)/bs;
3836 #endif
3837             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3838               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3839               col  =  in[j];
3840             }
3841           } else col = in[j];
3842           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3843         }
3844       }
3845     } else {
3846       if (!baij->donotstash) {
3847         if (roworiented) {
3848           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3849         } else {
3850           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3851         }
3852       }
3853     }
3854   }
3855 
3856   /* task normally handled by MatSetValuesBlocked() */
3857   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 /*@
3862      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3863          CSR format the local rows.
3864 
3865    Collective on MPI_Comm
3866 
3867    Input Parameters:
3868 +  comm - MPI communicator
3869 .  bs - the block size, only a block size of 1 is supported
3870 .  m - number of local rows (Cannot be PETSC_DECIDE)
3871 .  n - This value should be the same as the local size used in creating the
3872        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3873        calculated if N is given) For square matrices n is almost always m.
3874 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3875 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3876 .   i - row indices
3877 .   j - column indices
3878 -   a - matrix values
3879 
3880    Output Parameter:
3881 .   mat - the matrix
3882 
3883    Level: intermediate
3884 
3885    Notes:
3886        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3887      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3888      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3889 
3890      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3891      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3892      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3893      with column-major ordering within blocks.
3894 
3895        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3896 
3897 .keywords: matrix, aij, compressed row, sparse, parallel
3898 
3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3900           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3901 @*/
3902 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3903 {
3904   PetscErrorCode ierr;
3905 
3906   PetscFunctionBegin;
3907   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3908   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3909   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3910   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3911   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3912   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3913   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3914   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3915   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3916   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3921 {
3922   PetscErrorCode ierr;
3923   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3924   PetscInt       *indx;
3925   PetscScalar    *values;
3926 
3927   PetscFunctionBegin;
3928   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3929   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3930     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3931     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3932     PetscInt       *bindx,rmax=a->rmax,j;
3933     PetscMPIInt    rank,size;
3934 
3935     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3936     mbs = m/bs; Nbs = N/cbs;
3937     if (n == PETSC_DECIDE) {
3938       nbs  = n;
3939       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3940       n    = nbs*cbs;
3941     } else {
3942       nbs = n/cbs;
3943     }
3944 
3945     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3946     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3947 
3948     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3949     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3950     if (rank == size-1) {
3951       /* Check sum(nbs) = Nbs */
3952       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3953     }
3954 
3955     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3956     for (i=0; i<mbs; i++) {
3957       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3958       nnz = nnz/bs;
3959       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3960       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3961       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3962     }
3963     ierr = PetscFree(bindx);CHKERRQ(ierr);
3964 
3965     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3966     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3967     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3968     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3969     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3970     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3971     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3972   }
3973 
3974   /* numeric phase */
3975   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3976   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3977 
3978   for (i=0; i<m; i++) {
3979     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3980     Ii   = i + rstart;
3981     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3982     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3983   }
3984   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3985   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3986   PetscFunctionReturn(0);
3987 }
3988