xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7f489da91baee7cfbedb679c2d7b0bca5dd3dd5b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
92   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
98   { \
99  \
100     brow = row/bs;  \
101     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
102     rmax = aimax[brow]; nrow = ailen[brow]; \
103     bcol = col/bs; \
104     ridx = row % bs; cidx = col % bs; \
105     low  = 0; high = nrow; \
106     while (high-low > 3) { \
107       t = (low+high)/2; \
108       if (rp[t] > bcol) high = t; \
109       else              low  = t; \
110     } \
111     for (_i=low; _i<high; _i++) { \
112       if (rp[_i] > bcol) break; \
113       if (rp[_i] == bcol) { \
114         bap = ap +  bs2*_i + bs*cidx + ridx; \
115         if (addv == ADD_VALUES) *bap += value;  \
116         else                    *bap  = value;  \
117         goto a_noinsert; \
118       } \
119     } \
120     if (a->nonew == 1) goto a_noinsert; \
121     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
122     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
123     N = nrow++ - 1;  \
124     /* shift up all the later entries in this row */ \
125     for (ii=N; ii>=_i; ii--) { \
126       rp[ii+1] = rp[ii]; \
127       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
128     } \
129     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
130     rp[_i]                      = bcol;  \
131     ap[bs2*_i + bs*cidx + ridx] = value;  \
132 a_noinsert:; \
133     ailen[brow] = nrow; \
134   }
135 
136 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
137   { \
138     brow = row/bs;  \
139     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
140     rmax = bimax[brow]; nrow = bilen[brow]; \
141     bcol = col/bs; \
142     ridx = row % bs; cidx = col % bs; \
143     low  = 0; high = nrow; \
144     while (high-low > 3) { \
145       t = (low+high)/2; \
146       if (rp[t] > bcol) high = t; \
147       else              low  = t; \
148     } \
149     for (_i=low; _i<high; _i++) { \
150       if (rp[_i] > bcol) break; \
151       if (rp[_i] == bcol) { \
152         bap = ap +  bs2*_i + bs*cidx + ridx; \
153         if (addv == ADD_VALUES) *bap += value;  \
154         else                    *bap  = value;  \
155         goto b_noinsert; \
156       } \
157     } \
158     if (b->nonew == 1) goto b_noinsert; \
159     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
160     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
161     N = nrow++ - 1;  \
162     /* shift up all the later entries in this row */ \
163     for (ii=N; ii>=_i; ii--) { \
164       rp[ii+1] = rp[ii]; \
165       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
166     } \
167     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
168     rp[_i]                      = bcol;  \
169     ap[bs2*_i + bs*cidx + ridx] = value;  \
170 b_noinsert:; \
171     bilen[brow] = nrow; \
172   }
173 
174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
175 {
176   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
177   MatScalar      value;
178   PetscBool      roworiented = baij->roworiented;
179   PetscErrorCode ierr;
180   PetscInt       i,j,row,col;
181   PetscInt       rstart_orig=mat->rmap->rstart;
182   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
183   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
184 
185   /* Some Variables required in the macro */
186   Mat         A     = baij->A;
187   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
188   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
189   MatScalar   *aa   =a->a;
190 
191   Mat         B     = baij->B;
192   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
193   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
194   MatScalar   *ba   =b->a;
195 
196   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
197   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
198   MatScalar *ap,*bap;
199 
200   PetscFunctionBegin;
201   for (i=0; i<m; i++) {
202     if (im[i] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
205 #endif
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j=0; j<n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i*n+j];
212           else             value = v[i+j*m];
213           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
214           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
215         } else if (in[j] < 0) continue;
216 #if defined(PETSC_USE_DEBUG)
217         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!baij->colmap) {
222               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
223             }
224 #if defined(PETSC_USE_CTABLE)
225             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
226             col  = col - 1;
227 #else
228             col = baij->colmap[in[j]/bs] - 1;
229 #endif
230             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
231               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
232               col  =  in[j];
233               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
234               B    = baij->B;
235               b    = (Mat_SeqBAIJ*)(B)->data;
236               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
237               ba   =b->a;
238             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
239             else col += in[j]%bs;
240           } else col = in[j];
241           if (roworiented) value = v[i*n+j];
242           else             value = v[i+j*m];
243           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
244           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
245         }
246       }
247     } else {
248       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
249       if (!baij->donotstash) {
250         mat->assembled = PETSC_FALSE;
251         if (roworiented) {
252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
253         } else {
254           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
263 {
264   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
265   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
266   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
267   PetscErrorCode    ierr;
268   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
269   PetscBool         roworiented=a->roworiented;
270   const PetscScalar *value     = v;
271   MatScalar         *ap,*aa = a->a,*bap;
272 
273   PetscFunctionBegin;
274   rp   = aj + ai[row];
275   ap   = aa + bs2*ai[row];
276   rmax = imax[row];
277   nrow = ailen[row];
278   value = v;
279   low = 0;
280   high = nrow;
281   while (high-low > 7) {
282     t = (low+high)/2;
283     if (rp[t] > col) high = t;
284     else             low  = t;
285   }
286   for (i=low; i<high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap +  bs2*i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii=0; ii<bs; ii++) {
293             for (jj=ii; jj<bs2; jj+=bs) {
294               bap[jj] += *value++;
295             }
296           }
297         } else {
298           for (ii=0; ii<bs; ii++) {
299             for (jj=ii; jj<bs2; jj+=bs) {
300               bap[jj] = *value++;
301             }
302           }
303         }
304       } else {
305         if (is == ADD_VALUES) {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj] += value[jj];
309             }
310             bap += bs;
311           }
312         } else {
313           for (ii=0; ii<bs; ii++,value+=bs) {
314             for (jj=0; jj<bs; jj++) {
315               bap[jj]  = value[jj];
316             }
317             bap += bs;
318           }
319         }
320       }
321       goto noinsert2;
322     }
323   }
324   if (nonew == 1) goto noinsert2;
325   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
326   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
327   N = nrow++ - 1; high++;
328   /* shift up all the later entries in this row */
329   for (ii=N; ii>=i; ii--) {
330     rp[ii+1] = rp[ii];
331     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
332   }
333   if (N >= i) {
334     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
335   }
336   rp[i] = col;
337   bap   = ap +  bs2*i;
338   if (roworiented) {
339     for (ii=0; ii<bs; ii++) {
340       for (jj=ii; jj<bs2; jj+=bs) {
341         bap[jj] = *value++;
342       }
343     }
344   } else {
345     for (ii=0; ii<bs; ii++) {
346       for (jj=0; jj<bs; jj++) {
347         *bap++ = *value++;
348       }
349     }
350   }
351   noinsert2:;
352   ailen[row] = nrow;
353   PetscFunctionReturn(0);
354 }
355 
356 /*
357     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
358     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
359 */
360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
361 {
362   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
363   const PetscScalar *value;
364   MatScalar         *barray     = baij->barray;
365   PetscBool         roworiented = baij->roworiented;
366   PetscErrorCode    ierr;
367   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
368   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
369   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
370 
371   PetscFunctionBegin;
372   if (!barray) {
373     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
374     baij->barray = barray;
375   }
376 
377   if (roworiented) stepval = (n-1)*bs;
378   else stepval = (m-1)*bs;
379 
380   for (i=0; i<m; i++) {
381     if (im[i] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
384 #endif
385     if (im[i] >= rstart && im[i] < rend) {
386       row = im[i] - rstart;
387       for (j=0; j<n; j++) {
388         /* If NumCol = 1 then a copy is not required */
389         if ((roworiented) && (n == 1)) {
390           barray = (MatScalar*)v + i*bs2;
391         } else if ((!roworiented) && (m == 1)) {
392           barray = (MatScalar*)v + j*bs2;
393         } else { /* Here a copy is required */
394           if (roworiented) {
395             value = v + (i*(stepval+bs) + j)*bs;
396           } else {
397             value = v + (j*(stepval+bs) + i)*bs;
398           }
399           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
400             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
401             barray += bs;
402           }
403           barray -= bs2;
404         }
405 
406         if (in[j] >= cstart && in[j] < cend) {
407           col  = in[j] - cstart;
408           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
409         } else if (in[j] < 0) continue;
410 #if defined(PETSC_USE_DEBUG)
411         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
412 #endif
413         else {
414           if (mat->was_assembled) {
415             if (!baij->colmap) {
416               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
417             }
418 
419 #if defined(PETSC_USE_DEBUG)
420 #if defined(PETSC_USE_CTABLE)
421             { PetscInt data;
422               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
423               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
424             }
425 #else
426             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
427 #endif
428 #endif
429 #if defined(PETSC_USE_CTABLE)
430             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431             col  = (col - 1)/bs;
432 #else
433             col = (baij->colmap[in[j]] - 1)/bs;
434 #endif
435             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
436               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
437               col  =  in[j];
438             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
439           } else col = in[j];
440           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
441         }
442       }
443     } else {
444       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #define HASH_KEY 0.6180339887
458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
462 {
463   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
464   PetscBool      roworiented = baij->roworiented;
465   PetscErrorCode ierr;
466   PetscInt       i,j,row,col;
467   PetscInt       rstart_orig=mat->rmap->rstart;
468   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
469   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
470   PetscReal      tmp;
471   MatScalar      **HD = baij->hd,value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   for (i=0; i<m; i++) {
478 #if defined(PETSC_USE_DEBUG)
479     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
480     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
481 #endif
482     row = im[i];
483     if (row >= rstart_orig && row < rend_orig) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486         if (roworiented) value = v[i*n+j];
487         else             value = v[i+j*m];
488         /* Look up PetscInto the Hash Table */
489         key = (row/bs)*Nbs+(col/bs)+1;
490         h1  = HASH(size,key,tmp);
491 
492 
493         idx = h1;
494 #if defined(PETSC_USE_DEBUG)
495         insert_ct++;
496         total_ct++;
497         if (HT[idx] != key) {
498           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
499           if (idx == size) {
500             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
501             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
502           }
503         }
504 #else
505         if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
509             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510           }
511         }
512 #endif
513         /* A HASH table entry is found, so insert the values at the correct address */
514         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
515         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
516       }
517     } else if (!baij->donotstash) {
518       if (roworiented) {
519         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
520       } else {
521         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
522       }
523     }
524   }
525 #if defined(PETSC_USE_DEBUG)
526   baij->ht_total_ct  += total_ct;
527   baij->ht_insert_ct += insert_ct;
528 #endif
529   PetscFunctionReturn(0);
530 }
531 
532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
533 {
534   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
535   PetscBool         roworiented = baij->roworiented;
536   PetscErrorCode    ierr;
537   PetscInt          i,j,ii,jj,row,col;
538   PetscInt          rstart=baij->rstartbs;
539   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
540   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
541   PetscReal         tmp;
542   MatScalar         **HD = baij->hd,*baij_a;
543   const PetscScalar *v_t,*value;
544 #if defined(PETSC_USE_DEBUG)
545   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
546 #endif
547 
548   PetscFunctionBegin;
549   if (roworiented) stepval = (n-1)*bs;
550   else stepval = (m-1)*bs;
551 
552   for (i=0; i<m; i++) {
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
555     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
556 #endif
557     row = im[i];
558     v_t = v + i*nbs2;
559     if (row >= rstart && row < rend) {
560       for (j=0; j<n; j++) {
561         col = in[j];
562 
563         /* Look up into the Hash Table */
564         key = row*Nbs+col+1;
565         h1  = HASH(size,key,tmp);
566 
567         idx = h1;
568 #if defined(PETSC_USE_DEBUG)
569         total_ct++;
570         insert_ct++;
571         if (HT[idx] != key) {
572           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
573           if (idx == size) {
574             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
575             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
576           }
577         }
578 #else
579         if (HT[idx] != key) {
580           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
581           if (idx == size) {
582             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
583             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
584           }
585         }
586 #endif
587         baij_a = HD[idx];
588         if (roworiented) {
589           /*value = v + i*(stepval+bs)*bs + j*bs;*/
590           /* value = v + (i*(stepval+bs)+j)*bs; */
591           value = v_t;
592           v_t  += bs;
593           if (addv == ADD_VALUES) {
594             for (ii=0; ii<bs; ii++,value+=stepval) {
595               for (jj=ii; jj<bs2; jj+=bs) {
596                 baij_a[jj] += *value++;
597               }
598             }
599           } else {
600             for (ii=0; ii<bs; ii++,value+=stepval) {
601               for (jj=ii; jj<bs2; jj+=bs) {
602                 baij_a[jj] = *value++;
603               }
604             }
605           }
606         } else {
607           value = v + j*(stepval+bs)*bs + i*bs;
608           if (addv == ADD_VALUES) {
609             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
610               for (jj=0; jj<bs; jj++) {
611                 baij_a[jj] += *value++;
612               }
613             }
614           } else {
615             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
616               for (jj=0; jj<bs; jj++) {
617                 baij_a[jj] = *value++;
618               }
619             }
620           }
621         }
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_DEBUG)
634   baij->ht_total_ct  += total_ct;
635   baij->ht_insert_ct += insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
641 {
642   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
643   PetscErrorCode ierr;
644   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
645   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
646 
647   PetscFunctionBegin;
648   for (i=0; i<m; i++) {
649     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
650     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
651     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
652       row = idxm[i] - bsrstart;
653       for (j=0; j<n; j++) {
654         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
655         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
656         if (idxn[j] >= bscstart && idxn[j] < bscend) {
657           col  = idxn[j] - bscstart;
658           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
659         } else {
660           if (!baij->colmap) {
661             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
662           }
663 #if defined(PETSC_USE_CTABLE)
664           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
665           data--;
666 #else
667           data = baij->colmap[idxn[j]/bs]-1;
668 #endif
669           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
670           else {
671             col  = data + idxn[j]%bs;
672             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
673           }
674         }
675       }
676     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
682 {
683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
684   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
685   PetscErrorCode ierr;
686   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
687   PetscReal      sum = 0.0;
688   MatScalar      *v;
689 
690   PetscFunctionBegin;
691   if (baij->size == 1) {
692     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
693   } else {
694     if (type == NORM_FROBENIUS) {
695       v  = amat->a;
696       nz = amat->nz*bs2;
697       for (i=0; i<nz; i++) {
698         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
699       }
700       v  = bmat->a;
701       nz = bmat->nz*bs2;
702       for (i=0; i<nz; i++) {
703         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
704       }
705       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
706       *nrm = PetscSqrtReal(*nrm);
707     } else if (type == NORM_1) { /* max column sum */
708       PetscReal *tmp,*tmp2;
709       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
710       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
711       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
712       v    = amat->a; jj = amat->j;
713       for (i=0; i<amat->nz; i++) {
714         for (j=0; j<bs; j++) {
715           col = bs*(cstart + *jj) + j; /* column index */
716           for (row=0; row<bs; row++) {
717             tmp[col] += PetscAbsScalar(*v);  v++;
718           }
719         }
720         jj++;
721       }
722       v = bmat->a; jj = bmat->j;
723       for (i=0; i<bmat->nz; i++) {
724         for (j=0; j<bs; j++) {
725           col = bs*garray[*jj] + j;
726           for (row=0; row<bs; row++) {
727             tmp[col] += PetscAbsScalar(*v); v++;
728           }
729         }
730         jj++;
731       }
732       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
733       *nrm = 0.0;
734       for (j=0; j<mat->cmap->N; j++) {
735         if (tmp2[j] > *nrm) *nrm = tmp2[j];
736       }
737       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
738     } else if (type == NORM_INFINITY) { /* max row sum */
739       PetscReal *sums;
740       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
741       sum  = 0.0;
742       for (j=0; j<amat->mbs; j++) {
743         for (row=0; row<bs; row++) sums[row] = 0.0;
744         v  = amat->a + bs2*amat->i[j];
745         nz = amat->i[j+1]-amat->i[j];
746         for (i=0; i<nz; i++) {
747           for (col=0; col<bs; col++) {
748             for (row=0; row<bs; row++) {
749               sums[row] += PetscAbsScalar(*v); v++;
750             }
751           }
752         }
753         v  = bmat->a + bs2*bmat->i[j];
754         nz = bmat->i[j+1]-bmat->i[j];
755         for (i=0; i<nz; i++) {
756           for (col=0; col<bs; col++) {
757             for (row=0; row<bs; row++) {
758               sums[row] += PetscAbsScalar(*v); v++;
759             }
760           }
761         }
762         for (row=0; row<bs; row++) {
763           if (sums[row] > sum) sum = sums[row];
764         }
765       }
766       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
767       ierr = PetscFree(sums);CHKERRQ(ierr);
768     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
781 {
782   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
783   Mat            A     = baij->A,B=baij->B;
784   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
785   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
786   PetscErrorCode ierr;
787   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
788   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
789   PetscInt       *HT,key;
790   MatScalar      **HD;
791   PetscReal      tmp;
792 #if defined(PETSC_USE_INFO)
793   PetscInt ct=0,max=0;
794 #endif
795 
796   PetscFunctionBegin;
797   if (baij->ht) PetscFunctionReturn(0);
798 
799   baij->ht_size = (PetscInt)(factor*nz);
800   ht_size       = baij->ht_size;
801 
802   /* Allocate Memory for Hash Table */
803   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
804   HD   = baij->hd;
805   HT   = baij->ht;
806 
807   /* Loop Over A */
808   for (i=0; i<a->mbs; i++) {
809     for (j=ai[i]; j<ai[i+1]; j++) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(ht_size,key,tmp);
815       for (k=0; k<ht_size; k++) {
816         if (!HT[(h1+k)%ht_size]) {
817           HT[(h1+k)%ht_size] = key;
818           HD[(h1+k)%ht_size] = a->a + j*bs2;
819           break;
820 #if defined(PETSC_USE_INFO)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(PETSC_USE_INFO)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for (i=0; i<b->mbs; i++) {
833     for (j=bi[i]; j<bi[i+1]; j++) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(ht_size,key,tmp);
838       for (k=0; k<ht_size; k++) {
839         if (!HT[(h1+k)%ht_size]) {
840           HT[(h1+k)%ht_size] = key;
841           HD[(h1+k)%ht_size] = b->a + j*bs2;
842           break;
843 #if defined(PETSC_USE_INFO)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(PETSC_USE_INFO)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(PETSC_USE_INFO)
857   for (i=0,j=0; i<ht_size; i++) {
858     if (HT[i]) j++;
859   }
860   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
868   PetscErrorCode ierr;
869   PetscInt       nstash,reallocs;
870 
871   PetscFunctionBegin;
872   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
873 
874   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
875   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
876   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
877   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
878   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
879   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
886   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
887   PetscErrorCode ierr;
888   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
889   PetscInt       *row,*col;
890   PetscBool      r1,r2,r3,other_disassembled;
891   MatScalar      *val;
892   PetscMPIInt    n;
893 
894   PetscFunctionBegin;
895   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
896   if (!baij->donotstash && !mat->nooffprocentries) {
897     while (1) {
898       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
899       if (!flg) break;
900 
901       for (i=0; i<n;) {
902         /* Now identify the consecutive vals belonging to the same row */
903         for (j=i,rstart=row[j]; j<n; j++) {
904           if (row[j] != rstart) break;
905         }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         /* Now assemble all these values with a single function call */
909         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
910         i    = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
914     /* Now process the block-stash. Since the values are stashed column-oriented,
915        set the roworiented flag to column oriented, and after MatSetValues()
916        restore the original flags */
917     r1 = baij->roworiented;
918     r2 = a->roworiented;
919     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
920 
921     baij->roworiented = PETSC_FALSE;
922     a->roworiented    = PETSC_FALSE;
923 
924     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
925     while (1) {
926       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
927       if (!flg) break;
928 
929       for (i=0; i<n;) {
930         /* Now identify the consecutive vals belonging to the same row */
931         for (j=i,rstart=row[j]; j<n; j++) {
932           if (row[j] != rstart) break;
933         }
934         if (j < n) ncols = j-i;
935         else       ncols = n-i;
936         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
937         i    = j;
938       }
939     }
940     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
941 
942     baij->roworiented = r1;
943     a->roworiented    = r2;
944 
945     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
946   }
947 
948   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
950 
951   /* determine if any processor has disassembled, if so we must
952      also disassemble ourselfs, in order that we may reassemble. */
953   /*
954      if nonzero structure of submatrix B cannot change then we know that
955      no processor disassembled thus we can skip this stuff
956   */
957   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
958     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     if (mat->was_assembled && !other_disassembled) {
960       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
961     }
962   }
963 
964   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
965     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
966   }
967   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
969 
970 #if defined(PETSC_USE_INFO)
971   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
972     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
973 
974     baij->ht_total_ct  = 0;
975     baij->ht_insert_ct = 0;
976   }
977 #endif
978   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
980 
981     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
982     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
983   }
984 
985   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
986 
987   baij->rowvalues = 0;
988 
989   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
990   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
991     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
992     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
998 #include <petscdraw.h>
999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1000 {
1001   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1002   PetscErrorCode    ierr;
1003   PetscMPIInt       rank = baij->rank;
1004   PetscInt          bs   = mat->rmap->bs;
1005   PetscBool         iascii,isdraw;
1006   PetscViewer       sviewer;
1007   PetscViewerFormat format;
1008 
1009   PetscFunctionBegin;
1010   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1011   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1012   if (iascii) {
1013     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1014     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1015       MatInfo info;
1016       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1017       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1018       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1019       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1020                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1021       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1022       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1023       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1024       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1025       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1026       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1027       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1028       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1031       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1032       PetscFunctionReturn(0);
1033     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1034       PetscFunctionReturn(0);
1035     }
1036   }
1037 
1038   if (isdraw) {
1039     PetscDraw draw;
1040     PetscBool isnull;
1041     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1042     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1043     if (isnull) PetscFunctionReturn(0);
1044   }
1045 
1046   {
1047     /* assemble the entire matrix onto first processor. */
1048     Mat         A;
1049     Mat_SeqBAIJ *Aloc;
1050     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1051     MatScalar   *a;
1052     const char  *matname;
1053 
1054     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1055     /* Perhaps this should be the type of mat? */
1056     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1059     } else {
1060       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1061     }
1062     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1063     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1064     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1066 
1067     /* copy over the A part */
1068     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1069     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1070     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1071 
1072     for (i=0; i<mbs; i++) {
1073       rvals[0] = bs*(baij->rstartbs + i);
1074       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1075       for (j=ai[i]; j<ai[i+1]; j++) {
1076         col = (baij->cstartbs+aj[j])*bs;
1077         for (k=0; k<bs; k++) {
1078           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1079           col++; a += bs;
1080         }
1081       }
1082     }
1083     /* copy over the B part */
1084     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1085     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1086     for (i=0; i<mbs; i++) {
1087       rvals[0] = bs*(baij->rstartbs + i);
1088       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1089       for (j=ai[i]; j<ai[i+1]; j++) {
1090         col = baij->garray[aj[j]]*bs;
1091         for (k=0; k<bs; k++) {
1092           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1093           col++; a += bs;
1094         }
1095       }
1096     }
1097     ierr = PetscFree(rvals);CHKERRQ(ierr);
1098     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     /*
1101        Everyone has to call to draw the matrix since the graphics waits are
1102        synchronized across all processors that share the PetscDraw object
1103     */
1104     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1105     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1106     if (!rank) {
1107       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1108       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1109     }
1110     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1111     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1118 {
1119   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1120   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1121   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1122   PetscErrorCode ierr;
1123   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1124   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1125   int            fd;
1126   PetscScalar    *column_values;
1127   FILE           *file;
1128   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1129   PetscInt       message_count,flowcontrolcount;
1130 
1131   PetscFunctionBegin;
1132   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1134   nz   = bs2*(A->nz + B->nz);
1135   rlen = mat->rmap->n;
1136   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1137   if (!rank) {
1138     header[0] = MAT_FILE_CLASSID;
1139     header[1] = mat->rmap->N;
1140     header[2] = mat->cmap->N;
1141 
1142     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1143     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1144     /* get largest number of rows any processor has */
1145     range = mat->rmap->range;
1146     for (i=1; i<size; i++) {
1147       rlen = PetscMax(rlen,range[i+1] - range[i]);
1148     }
1149   } else {
1150     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1151   }
1152 
1153   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1154   /* compute lengths of each row  */
1155   for (i=0; i<a->mbs; i++) {
1156     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1157   }
1158   /* store the row lengths to the file */
1159   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1160   if (!rank) {
1161     MPI_Status status;
1162     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1163     rlen = (range[1] - range[0])/bs;
1164     for (i=0; i<rlen; i++) {
1165       for (j=0; j<bs; j++) {
1166         row_lens[i*bs+j] = bs*crow_lens[i];
1167       }
1168     }
1169     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       rlen = (range[i+1] - range[i])/bs;
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       for (k=0; k<rlen; k++) {
1175         for (j=0; j<bs; j++) {
1176           row_lens[k*bs+j] = bs*crow_lens[k];
1177         }
1178       }
1179       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1182     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1183   } else {
1184     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1185     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1187   }
1188   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1189 
1190   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1191      information needed to make it for each row from a block row. This does require more communication but still not more than
1192      the communication needed for the nonzero values  */
1193   nzmax = nz; /*  space a largest processor needs */
1194   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1196   cnt   = 0;
1197   for (i=0; i<a->mbs; i++) {
1198     pcnt = cnt;
1199     for (j=B->i[i]; j<B->i[i+1]; j++) {
1200       if ((col = garray[B->j[j]]) > cstart) break;
1201       for (l=0; l<bs; l++) {
1202         column_indices[cnt++] = bs*col+l;
1203       }
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       for (l=0; l<bs; l++) {
1207         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1208       }
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1213       }
1214     }
1215     len = cnt - pcnt;
1216     for (k=1; k<bs; k++) {
1217       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1218       cnt += len;
1219     }
1220   }
1221   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1222 
1223   /* store the columns to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the numerical values */
1244   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<a->mbs; i++) {
1247     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1248     for (j=B->i[i]; j<B->i[i+1]; j++) {
1249       if (garray[B->j[j]] > cstart) break;
1250       for (l=0; l<bs; l++) {
1251         for (ll=0; ll<bs; ll++) {
1252           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1253         }
1254       }
1255       cnt += bs;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) {
1258       for (l=0; l<bs; l++) {
1259         for (ll=0; ll<bs; ll++) {
1260           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1261         }
1262       }
1263       cnt += bs;
1264     }
1265     for (; j<B->i[i+1]; j++) {
1266       for (l=0; l<bs; l++) {
1267         for (ll=0; ll<bs; ll++) {
1268           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1269         }
1270       }
1271       cnt += bs;
1272     }
1273     cnt += (bs-1)*rlen;
1274   }
1275   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1276 
1277   /* store the column values to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     MPI_Status status;
1281     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     for (i=1; i<size; i++) {
1283       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1284       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1285       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1292     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_values);CHKERRQ(ierr);
1296 
1297   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1298   if (file) {
1299     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1305 {
1306   PetscErrorCode ierr;
1307   PetscBool      iascii,isdraw,issocket,isbinary;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1314   if (iascii || isdraw || issocket) {
1315     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1316   } else if (isbinary) {
1317     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1323 {
1324   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328 #if defined(PETSC_USE_LOG)
1329   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1330 #endif
1331   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1332   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1333   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1334   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1335 #if defined(PETSC_USE_CTABLE)
1336   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1337 #else
1338   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1339 #endif
1340   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1341   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1342   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1343   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1344   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1345   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1346   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1347   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1348 
1349   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1358 #if defined(PETSC_HAVE_HYPRE)
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1360 #endif
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscInt       nt;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1374   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1375   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1376   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1377   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1379   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1385 {
1386   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1392   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401   PetscBool      merged;
1402 
1403   PetscFunctionBegin;
1404   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1405   /* do nondiagonal part */
1406   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1407   if (!merged) {
1408     /* send it on its way */
1409     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1410     /* do local part */
1411     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1412     /* receive remote parts: note this assumes the values are not actually */
1413     /* inserted in yy until the next line */
1414     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   } else {
1416     /* do local part */
1417     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1418     /* send it on its way */
1419     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1420     /* values actually were received in the Begin() but we need to call this nop */
1421     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1427 {
1428   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   /* do nondiagonal part */
1433   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1434   /* send it on its way */
1435   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1436   /* do local part */
1437   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1438   /* receive remote parts: note this assumes the values are not actually */
1439   /* inserted in yy until the next line, which is true for my implementation*/
1440   /* but is not perhaps always true. */
1441   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 /*
1446   This only works correctly for square matrices where the subblock A->A is the
1447    diagonal block
1448 */
1449 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1450 {
1451   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1452   PetscErrorCode ierr;
1453 
1454   PetscFunctionBegin;
1455   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1456   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1461 {
1462   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1467   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1477   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1479 
1480   PetscFunctionBegin;
1481   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1482   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1483   mat->getrowactive = PETSC_TRUE;
1484 
1485   if (!mat->rowvalues && (idx || v)) {
1486     /*
1487         allocate enough space to hold information from the longest row.
1488     */
1489     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1490     PetscInt    max = 1,mbs = mat->mbs,tmp;
1491     for (i=0; i<mbs; i++) {
1492       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1493       if (max < tmp) max = tmp;
1494     }
1495     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1496   }
1497   lrow = row - brstart;
1498 
1499   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1500   if (!v)   {pvA = 0; pvB = 0;}
1501   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1502   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   nztot = nzA + nzB;
1505 
1506   cmap = mat->garray;
1507   if (v  || idx) {
1508     if (nztot) {
1509       /* Sort by increasing column numbers, assuming A and B already sorted */
1510       PetscInt imark = -1;
1511       if (v) {
1512         *v = v_p = mat->rowvalues;
1513         for (i=0; i<nzB; i++) {
1514           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1515           else break;
1516         }
1517         imark = i;
1518         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1519         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1520       }
1521       if (idx) {
1522         *idx = idx_p = mat->rowindices;
1523         if (imark > -1) {
1524           for (i=0; i<imark; i++) {
1525             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1526           }
1527         } else {
1528           for (i=0; i<nzB; i++) {
1529             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1530             else break;
1531           }
1532           imark = i;
1533         }
1534         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1535         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1536       }
1537     } else {
1538       if (idx) *idx = 0;
1539       if (v)   *v   = 0;
1540     }
1541   }
1542   *nz  = nztot;
1543   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1544   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1554   baij->getrowactive = PETSC_FALSE;
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1559 {
1560   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1565   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1570 {
1571   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1572   Mat            A  = a->A,B = a->B;
1573   PetscErrorCode ierr;
1574   PetscReal      isend[5],irecv[5];
1575 
1576   PetscFunctionBegin;
1577   info->block_size = (PetscReal)matin->rmap->bs;
1578 
1579   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1580 
1581   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1582   isend[3] = info->memory;  isend[4] = info->mallocs;
1583 
1584   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1585 
1586   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1587   isend[3] += info->memory;  isend[4] += info->mallocs;
1588 
1589   if (flag == MAT_LOCAL) {
1590     info->nz_used      = isend[0];
1591     info->nz_allocated = isend[1];
1592     info->nz_unneeded  = isend[2];
1593     info->memory       = isend[3];
1594     info->mallocs      = isend[4];
1595   } else if (flag == MAT_GLOBAL_MAX) {
1596     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1597 
1598     info->nz_used      = irecv[0];
1599     info->nz_allocated = irecv[1];
1600     info->nz_unneeded  = irecv[2];
1601     info->memory       = irecv[3];
1602     info->mallocs      = irecv[4];
1603   } else if (flag == MAT_GLOBAL_SUM) {
1604     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1605 
1606     info->nz_used      = irecv[0];
1607     info->nz_allocated = irecv[1];
1608     info->nz_unneeded  = irecv[2];
1609     info->memory       = irecv[3];
1610     info->mallocs      = irecv[4];
1611   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1612   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1613   info->fill_ratio_needed = 0;
1614   info->factor_mallocs    = 0;
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1619 {
1620   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1621   PetscErrorCode ierr;
1622 
1623   PetscFunctionBegin;
1624   switch (op) {
1625   case MAT_NEW_NONZERO_LOCATIONS:
1626   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1627   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1628   case MAT_KEEP_NONZERO_PATTERN:
1629   case MAT_NEW_NONZERO_LOCATION_ERR:
1630     MatCheckPreallocated(A,1);
1631     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1632     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1633     break;
1634   case MAT_ROW_ORIENTED:
1635     MatCheckPreallocated(A,1);
1636     a->roworiented = flg;
1637 
1638     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1639     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1640     break;
1641   case MAT_NEW_DIAGONALS:
1642     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1643     break;
1644   case MAT_IGNORE_OFF_PROC_ENTRIES:
1645     a->donotstash = flg;
1646     break;
1647   case MAT_USE_HASH_TABLE:
1648     a->ht_flag = flg;
1649     a->ht_fact = 1.39;
1650     break;
1651   case MAT_SYMMETRIC:
1652   case MAT_STRUCTURALLY_SYMMETRIC:
1653   case MAT_HERMITIAN:
1654   case MAT_SUBMAT_SINGLEIS:
1655   case MAT_SYMMETRY_ETERNAL:
1656     MatCheckPreallocated(A,1);
1657     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1658     break;
1659   default:
1660     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1661   }
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1666 {
1667   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1668   Mat_SeqBAIJ    *Aloc;
1669   Mat            B;
1670   PetscErrorCode ierr;
1671   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1672   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1673   MatScalar      *a;
1674 
1675   PetscFunctionBegin;
1676   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1677     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1678     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1679     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1680     /* Do not know preallocation information, but must set block size */
1681     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1682   } else {
1683     B = *matout;
1684   }
1685 
1686   /* copy over the A part */
1687   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1688   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1689   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1690 
1691   for (i=0; i<mbs; i++) {
1692     rvals[0] = bs*(baij->rstartbs + i);
1693     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1694     for (j=ai[i]; j<ai[i+1]; j++) {
1695       col = (baij->cstartbs+aj[j])*bs;
1696       for (k=0; k<bs; k++) {
1697         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1698 
1699         col++; a += bs;
1700       }
1701     }
1702   }
1703   /* copy over the B part */
1704   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1705   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1706   for (i=0; i<mbs; i++) {
1707     rvals[0] = bs*(baij->rstartbs + i);
1708     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1709     for (j=ai[i]; j<ai[i+1]; j++) {
1710       col = baij->garray[aj[j]]*bs;
1711       for (k=0; k<bs; k++) {
1712         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1713         col++;
1714         a += bs;
1715       }
1716     }
1717   }
1718   ierr = PetscFree(rvals);CHKERRQ(ierr);
1719   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721 
1722   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1723   else {
1724     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1730 {
1731   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1732   Mat            a     = baij->A,b = baij->B;
1733   PetscErrorCode ierr;
1734   PetscInt       s1,s2,s3;
1735 
1736   PetscFunctionBegin;
1737   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1738   if (rr) {
1739     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1740     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1741     /* Overlap communication with computation. */
1742     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1743   }
1744   if (ll) {
1745     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1746     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1747     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1748   }
1749   /* scale  the diagonal block */
1750   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1751 
1752   if (rr) {
1753     /* Do a scatter end and then right scale the off-diagonal block */
1754     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1755     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1761 {
1762   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1763   PetscInt      *lrows;
1764   PetscInt       r, len;
1765   PetscBool      cong;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   /* get locally owned rows */
1770   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1771   /* fix right hand side if needed */
1772   if (x && b) {
1773     const PetscScalar *xx;
1774     PetscScalar       *bb;
1775 
1776     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1777     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1779     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1780     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1781   }
1782 
1783   /* actually zap the local rows */
1784   /*
1785         Zero the required rows. If the "diagonal block" of the matrix
1786      is square and the user wishes to set the diagonal we use separate
1787      code so that MatSetValues() is not called for each diagonal allocating
1788      new memory, thus calling lots of mallocs and slowing things down.
1789 
1790   */
1791   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1792   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1793   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1794   if ((diag != 0.0) && cong) {
1795     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1796   } else if (diag != 0.0) {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1798     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1799        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1800     for (r = 0; r < len; ++r) {
1801       const PetscInt row = lrows[r] + A->rmap->rstart;
1802       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1803     }
1804     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1805     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1808   }
1809   ierr = PetscFree(lrows);CHKERRQ(ierr);
1810 
1811   /* only change matrix nonzero state if pattern was allowed to be changed */
1812   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1813     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1814     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1820 {
1821   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1822   PetscErrorCode    ierr;
1823   PetscMPIInt       n = A->rmap->n;
1824   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1825   PetscInt          *lrows,*owners = A->rmap->range;
1826   PetscSFNode       *rrows;
1827   PetscSF           sf;
1828   const PetscScalar *xx;
1829   PetscScalar       *bb,*mask;
1830   Vec               xmask,lmask;
1831   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1832   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1833   PetscScalar       *aa;
1834 
1835   PetscFunctionBegin;
1836   /* Create SF where leaves are input rows and roots are owned rows */
1837   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1838   for (r = 0; r < n; ++r) lrows[r] = -1;
1839   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1840   for (r = 0; r < N; ++r) {
1841     const PetscInt idx   = rows[r];
1842     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1843     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1844       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1845     }
1846     rrows[r].rank  = p;
1847     rrows[r].index = rows[r] - owners[p];
1848   }
1849   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1850   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1851   /* Collect flags for rows to be zeroed */
1852   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1853   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1854   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1855   /* Compress and put in row numbers */
1856   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1857   /* zero diagonal part of matrix */
1858   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1859   /* handle off diagonal part of matrix */
1860   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1861   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1862   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1863   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1864   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1865   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1867   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1868   if (x) {
1869     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1870     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1871     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1872     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1873   }
1874   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1875   /* remove zeroed rows of off diagonal matrix */
1876   for (i = 0; i < len; ++i) {
1877     row   = lrows[i];
1878     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1879     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1880     for (k = 0; k < count; ++k) {
1881       aa[0] = 0.0;
1882       aa   += bs;
1883     }
1884   }
1885   /* loop over all elements of off process part of matrix zeroing removed columns*/
1886   for (i = 0; i < l->B->rmap->N; ++i) {
1887     row = i/bs;
1888     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1889       for (k = 0; k < bs; ++k) {
1890         col = bs*baij->j[j] + k;
1891         if (PetscAbsScalar(mask[col])) {
1892           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1893           if (x) bb[i] -= aa[0]*xx[col];
1894           aa[0] = 0.0;
1895         }
1896       }
1897     }
1898   }
1899   if (x) {
1900     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1901     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1902   }
1903   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1904   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1905   ierr = PetscFree(lrows);CHKERRQ(ierr);
1906 
1907   /* only change matrix nonzero state if pattern was allowed to be changed */
1908   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1909     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1910     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1916 {
1917   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1926 
1927 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1928 {
1929   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1930   Mat            a,b,c,d;
1931   PetscBool      flg;
1932   PetscErrorCode ierr;
1933 
1934   PetscFunctionBegin;
1935   a = matA->A; b = matA->B;
1936   c = matB->A; d = matB->B;
1937 
1938   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1939   if (flg) {
1940     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1941   }
1942   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1947 {
1948   PetscErrorCode ierr;
1949   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1950   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1951 
1952   PetscFunctionBegin;
1953   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1954   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1955     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1956   } else {
1957     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1958     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1959   }
1960   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1965 {
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1974 {
1975   PetscErrorCode ierr;
1976   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1977   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1978   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1979 
1980   PetscFunctionBegin;
1981   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1986 {
1987   PetscErrorCode ierr;
1988   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1989   PetscBLASInt   bnz,one=1;
1990   Mat_SeqBAIJ    *x,*y;
1991   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1992 
1993   PetscFunctionBegin;
1994   if (str == SAME_NONZERO_PATTERN) {
1995     PetscScalar alpha = a;
1996     x    = (Mat_SeqBAIJ*)xx->A->data;
1997     y    = (Mat_SeqBAIJ*)yy->A->data;
1998     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1999     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2000     x    = (Mat_SeqBAIJ*)xx->B->data;
2001     y    = (Mat_SeqBAIJ*)yy->B->data;
2002     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2003     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2004     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2005   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2006     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2007   } else {
2008     Mat      B;
2009     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2010     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2011     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2012     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2013     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2014     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2015     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2016     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2017     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2018     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2019     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2020     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2021     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2022     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2023     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2024     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2025   }
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2030 {
2031   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2032   PetscErrorCode ierr;
2033 
2034   PetscFunctionBegin;
2035   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2036   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2041 {
2042   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2047   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2052 {
2053   PetscErrorCode ierr;
2054   IS             iscol_local;
2055   PetscInt       csize;
2056 
2057   PetscFunctionBegin;
2058   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2059   if (call == MAT_REUSE_MATRIX) {
2060     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2061     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2062   } else {
2063     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2064   }
2065   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2066   if (call == MAT_INITIAL_MATRIX) {
2067     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2068     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 /*
2074   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2075   in local and then by concatenating the local matrices the end result.
2076   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2077   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2078 */
2079 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2080 {
2081   PetscErrorCode ierr;
2082   PetscMPIInt    rank,size;
2083   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2084   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2085   Mat            M,Mreuse;
2086   MatScalar      *vwork,*aa;
2087   MPI_Comm       comm;
2088   IS             isrow_new, iscol_new;
2089   Mat_SeqBAIJ    *aij;
2090 
2091   PetscFunctionBegin;
2092   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2093   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2095   /* The compression and expansion should be avoided. Doesn't point
2096      out errors, might change the indices, hence buggey */
2097   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2098   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2099 
2100   if (call ==  MAT_REUSE_MATRIX) {
2101     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2102     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2103     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2104   } else {
2105     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2106   }
2107   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2108   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2109   /*
2110       m - number of local rows
2111       n - number of columns (same on all processors)
2112       rstart - first row in new global matrix generated
2113   */
2114   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2115   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2116   m    = m/bs;
2117   n    = n/bs;
2118 
2119   if (call == MAT_INITIAL_MATRIX) {
2120     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2121     ii  = aij->i;
2122     jj  = aij->j;
2123 
2124     /*
2125         Determine the number of non-zeros in the diagonal and off-diagonal
2126         portions of the matrix in order to do correct preallocation
2127     */
2128 
2129     /* first get start and end of "diagonal" columns */
2130     if (csize == PETSC_DECIDE) {
2131       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2132       if (mglobal == n*bs) { /* square matrix */
2133         nlocal = m;
2134       } else {
2135         nlocal = n/size + ((n % size) > rank);
2136       }
2137     } else {
2138       nlocal = csize/bs;
2139     }
2140     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2141     rstart = rend - nlocal;
2142     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2143 
2144     /* next, compute all the lengths */
2145     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2146     for (i=0; i<m; i++) {
2147       jend = ii[i+1] - ii[i];
2148       olen = 0;
2149       dlen = 0;
2150       for (j=0; j<jend; j++) {
2151         if (*jj < rstart || *jj >= rend) olen++;
2152         else dlen++;
2153         jj++;
2154       }
2155       olens[i] = olen;
2156       dlens[i] = dlen;
2157     }
2158     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2159     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2160     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2161     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2162     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2163     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2164   } else {
2165     PetscInt ml,nl;
2166 
2167     M    = *newmat;
2168     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2169     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2170     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2171     /*
2172          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2173        rather than the slower MatSetValues().
2174     */
2175     M->was_assembled = PETSC_TRUE;
2176     M->assembled     = PETSC_FALSE;
2177   }
2178   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2179   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2180   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2181   ii   = aij->i;
2182   jj   = aij->j;
2183   aa   = aij->a;
2184   for (i=0; i<m; i++) {
2185     row   = rstart/bs + i;
2186     nz    = ii[i+1] - ii[i];
2187     cwork = jj;     jj += nz;
2188     vwork = aa;     aa += nz*bs*bs;
2189     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2190   }
2191 
2192   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2194   *newmat = M;
2195 
2196   /* save submatrix used in processor for next request */
2197   if (call ==  MAT_INITIAL_MATRIX) {
2198     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2199     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2205 {
2206   MPI_Comm       comm,pcomm;
2207   PetscInt       clocal_size,nrows;
2208   const PetscInt *rows;
2209   PetscMPIInt    size;
2210   IS             crowp,lcolp;
2211   PetscErrorCode ierr;
2212 
2213   PetscFunctionBegin;
2214   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2215   /* make a collective version of 'rowp' */
2216   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2217   if (pcomm==comm) {
2218     crowp = rowp;
2219   } else {
2220     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2221     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2222     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2223     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2224   }
2225   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2226   /* make a local version of 'colp' */
2227   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2228   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2229   if (size==1) {
2230     lcolp = colp;
2231   } else {
2232     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2233   }
2234   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2235   /* now we just get the submatrix */
2236   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2237   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2238   /* clean up */
2239   if (pcomm!=comm) {
2240     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2241   }
2242   if (size>1) {
2243     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2244   }
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2249 {
2250   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2251   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2252 
2253   PetscFunctionBegin;
2254   if (nghosts) *nghosts = B->nbs;
2255   if (ghosts) *ghosts = baij->garray;
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2260 {
2261   Mat            B;
2262   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2263   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2264   Mat_SeqAIJ     *b;
2265   PetscErrorCode ierr;
2266   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2267   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2268   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2269 
2270   PetscFunctionBegin;
2271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2272   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2273 
2274   /* ----------------------------------------------------------------
2275      Tell every processor the number of nonzeros per row
2276   */
2277   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2278   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2279     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2280   }
2281   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2282   displs    = recvcounts + size;
2283   for (i=0; i<size; i++) {
2284     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2285     displs[i]     = A->rmap->range[i]/bs;
2286   }
2287 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2288   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2289 #else
2290   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2291   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2292 #endif
2293   /* ---------------------------------------------------------------
2294      Create the sequential matrix of the same type as the local block diagonal
2295   */
2296   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2297   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2298   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2299   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2300   b    = (Mat_SeqAIJ*)B->data;
2301 
2302   /*--------------------------------------------------------------------
2303     Copy my part of matrix column indices over
2304   */
2305   sendcount  = ad->nz + bd->nz;
2306   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2307   a_jsendbuf = ad->j;
2308   b_jsendbuf = bd->j;
2309   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2310   cnt        = 0;
2311   for (i=0; i<n; i++) {
2312 
2313     /* put in lower diagonal portion */
2314     m = bd->i[i+1] - bd->i[i];
2315     while (m > 0) {
2316       /* is it above diagonal (in bd (compressed) numbering) */
2317       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2318       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2319       m--;
2320     }
2321 
2322     /* put in diagonal portion */
2323     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2324       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2325     }
2326 
2327     /* put in upper diagonal portion */
2328     while (m-- > 0) {
2329       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2330     }
2331   }
2332   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2333 
2334   /*--------------------------------------------------------------------
2335     Gather all column indices to all processors
2336   */
2337   for (i=0; i<size; i++) {
2338     recvcounts[i] = 0;
2339     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2340       recvcounts[i] += lens[j];
2341     }
2342   }
2343   displs[0] = 0;
2344   for (i=1; i<size; i++) {
2345     displs[i] = displs[i-1] + recvcounts[i-1];
2346   }
2347 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2348   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2349 #else
2350   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2351 #endif
2352   /*--------------------------------------------------------------------
2353     Assemble the matrix into useable form (note numerical values not yet set)
2354   */
2355   /* set the b->ilen (length of each row) values */
2356   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2357   /* set the b->i indices */
2358   b->i[0] = 0;
2359   for (i=1; i<=A->rmap->N/bs; i++) {
2360     b->i[i] = b->i[i-1] + lens[i-1];
2361   }
2362   ierr = PetscFree(lens);CHKERRQ(ierr);
2363   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2366 
2367   if (A->symmetric) {
2368     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2369   } else if (A->hermitian) {
2370     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2371   } else if (A->structurally_symmetric) {
2372     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2373   }
2374   *newmat = B;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2379 {
2380   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2381   PetscErrorCode ierr;
2382   Vec            bb1 = 0;
2383 
2384   PetscFunctionBegin;
2385   if (flag == SOR_APPLY_UPPER) {
2386     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2387     PetscFunctionReturn(0);
2388   }
2389 
2390   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2391     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2392   }
2393 
2394   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2395     if (flag & SOR_ZERO_INITIAL_GUESS) {
2396       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2397       its--;
2398     }
2399 
2400     while (its--) {
2401       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2403 
2404       /* update rhs: bb1 = bb - B*x */
2405       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2406       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2407 
2408       /* local sweep */
2409       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2410     }
2411   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2412     if (flag & SOR_ZERO_INITIAL_GUESS) {
2413       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2414       its--;
2415     }
2416     while (its--) {
2417       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2418       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2419 
2420       /* update rhs: bb1 = bb - B*x */
2421       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2422       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2423 
2424       /* local sweep */
2425       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2426     }
2427   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2428     if (flag & SOR_ZERO_INITIAL_GUESS) {
2429       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2430       its--;
2431     }
2432     while (its--) {
2433       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2434       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2435 
2436       /* update rhs: bb1 = bb - B*x */
2437       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2438       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2439 
2440       /* local sweep */
2441       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2442     }
2443   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2444 
2445   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2450 {
2451   PetscErrorCode ierr;
2452   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2453   PetscInt       N,i,*garray = aij->garray;
2454   PetscInt       ib,jb,bs = A->rmap->bs;
2455   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2456   MatScalar      *a_val = a_aij->a;
2457   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2458   MatScalar      *b_val = b_aij->a;
2459   PetscReal      *work;
2460 
2461   PetscFunctionBegin;
2462   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2463   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2464   if (type == NORM_2) {
2465     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2466       for (jb=0; jb<bs; jb++) {
2467         for (ib=0; ib<bs; ib++) {
2468           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2469           a_val++;
2470         }
2471       }
2472     }
2473     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2474       for (jb=0; jb<bs; jb++) {
2475         for (ib=0; ib<bs; ib++) {
2476           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2477           b_val++;
2478         }
2479       }
2480     }
2481   } else if (type == NORM_1) {
2482     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2486           a_val++;
2487         }
2488       }
2489     }
2490     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2491       for (jb=0; jb<bs; jb++) {
2492        for (ib=0; ib<bs; ib++) {
2493           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2494           b_val++;
2495         }
2496       }
2497     }
2498   } else if (type == NORM_INFINITY) {
2499     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2500       for (jb=0; jb<bs; jb++) {
2501         for (ib=0; ib<bs; ib++) {
2502           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2503           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2504           a_val++;
2505         }
2506       }
2507     }
2508     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2509       for (jb=0; jb<bs; jb++) {
2510         for (ib=0; ib<bs; ib++) {
2511           int col = garray[b_aij->j[i]] * bs + jb;
2512           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2513           b_val++;
2514         }
2515       }
2516     }
2517   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2518   if (type == NORM_INFINITY) {
2519     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2520   } else {
2521     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2522   }
2523   ierr = PetscFree(work);CHKERRQ(ierr);
2524   if (type == NORM_2) {
2525     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2526   }
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2531 {
2532   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2533   PetscErrorCode ierr;
2534 
2535   PetscFunctionBegin;
2536   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2537   A->factorerrortype             = a->A->factorerrortype;
2538   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2539   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2544 {
2545   PetscErrorCode ierr;
2546   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2547   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2548 
2549   PetscFunctionBegin;
2550   if (!Y->preallocated) {
2551     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2552   } else if (!aij->nz) {
2553     PetscInt nonew = aij->nonew;
2554     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2555     aij->nonew = nonew;
2556   }
2557   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2562 {
2563   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2568   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2569   if (d) {
2570     PetscInt rstart;
2571     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2572     *d += rstart/A->rmap->bs;
2573 
2574   }
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2579 {
2580   PetscFunctionBegin;
2581   *a = ((Mat_MPIBAIJ*)A->data)->A;
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 /* -------------------------------------------------------------------*/
2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2587                                        MatGetRow_MPIBAIJ,
2588                                        MatRestoreRow_MPIBAIJ,
2589                                        MatMult_MPIBAIJ,
2590                                 /* 4*/ MatMultAdd_MPIBAIJ,
2591                                        MatMultTranspose_MPIBAIJ,
2592                                        MatMultTransposeAdd_MPIBAIJ,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*10*/ 0,
2597                                        0,
2598                                        0,
2599                                        MatSOR_MPIBAIJ,
2600                                        MatTranspose_MPIBAIJ,
2601                                 /*15*/ MatGetInfo_MPIBAIJ,
2602                                        MatEqual_MPIBAIJ,
2603                                        MatGetDiagonal_MPIBAIJ,
2604                                        MatDiagonalScale_MPIBAIJ,
2605                                        MatNorm_MPIBAIJ,
2606                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2607                                        MatAssemblyEnd_MPIBAIJ,
2608                                        MatSetOption_MPIBAIJ,
2609                                        MatZeroEntries_MPIBAIJ,
2610                                 /*24*/ MatZeroRows_MPIBAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*29*/ MatSetUp_MPIBAIJ,
2616                                        0,
2617                                        0,
2618                                        MatGetDiagonalBlock_MPIBAIJ,
2619                                        0,
2620                                 /*34*/ MatDuplicate_MPIBAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*39*/ MatAXPY_MPIBAIJ,
2626                                        MatCreateSubMatrices_MPIBAIJ,
2627                                        MatIncreaseOverlap_MPIBAIJ,
2628                                        MatGetValues_MPIBAIJ,
2629                                        MatCopy_MPIBAIJ,
2630                                 /*44*/ 0,
2631                                        MatScale_MPIBAIJ,
2632                                        MatShift_MPIBAIJ,
2633                                        0,
2634                                        MatZeroRowsColumns_MPIBAIJ,
2635                                 /*49*/ 0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2641                                        0,
2642                                        MatSetUnfactored_MPIBAIJ,
2643                                        MatPermute_MPIBAIJ,
2644                                        MatSetValuesBlocked_MPIBAIJ,
2645                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2646                                        MatDestroy_MPIBAIJ,
2647                                        MatView_MPIBAIJ,
2648                                        0,
2649                                        0,
2650                                 /*64*/ 0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*74*/ 0,
2661                                        MatFDColoringApply_BAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*79*/ 0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        MatLoad_MPIBAIJ,
2670                                 /*84*/ 0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*89*/ 0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*94*/ 0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*99*/ 0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                 /*104*/0,
2691                                        MatRealPart_MPIBAIJ,
2692                                        MatImaginaryPart_MPIBAIJ,
2693                                        0,
2694                                        0,
2695                                 /*109*/0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        MatMissingDiagonal_MPIBAIJ,
2700                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2701                                        0,
2702                                        MatGetGhosts_MPIBAIJ,
2703                                        0,
2704                                        0,
2705                                 /*119*/0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        MatGetMultiProcBlock_MPIBAIJ,
2710                                 /*124*/0,
2711                                        MatGetColumnNorms_MPIBAIJ,
2712                                        MatInvertBlockDiagonal_MPIBAIJ,
2713                                        0,
2714                                        0,
2715                                /*129*/ 0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /*134*/ 0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                /*139*/ MatSetBlockSizes_Default,
2726                                        0,
2727                                        0,
2728                                        MatFDColoringSetUp_MPIXAIJ,
2729                                        0,
2730                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2731 };
2732 
2733 
2734 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2735 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2736 
2737 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2738 {
2739   PetscInt       m,rstart,cstart,cend;
2740   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2741   const PetscInt *JJ    =0;
2742   PetscScalar    *values=0;
2743   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2748   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2749   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2750   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2751   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2752   m      = B->rmap->n/bs;
2753   rstart = B->rmap->rstart/bs;
2754   cstart = B->cmap->rstart/bs;
2755   cend   = B->cmap->rend/bs;
2756 
2757   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2758   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2759   for (i=0; i<m; i++) {
2760     nz = ii[i+1] - ii[i];
2761     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2762     nz_max = PetscMax(nz_max,nz);
2763     dlen   = 0;
2764     olen   = 0;
2765     JJ     = jj + ii[i];
2766     for (j=0; j<nz; j++) {
2767       if (*JJ < cstart || *JJ >= cend) olen++;
2768       else dlen++;
2769       JJ++;
2770     }
2771     d_nnz[i] = dlen;
2772     o_nnz[i] = olen;
2773   }
2774   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2775   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2776 
2777   values = (PetscScalar*)V;
2778   if (!values) {
2779     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2780   }
2781   for (i=0; i<m; i++) {
2782     PetscInt          row    = i + rstart;
2783     PetscInt          ncols  = ii[i+1] - ii[i];
2784     const PetscInt    *icols = jj + ii[i];
2785     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2786       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2787       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2788     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2789       PetscInt j;
2790       for (j=0; j<ncols; j++) {
2791         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2792         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2793       }
2794     }
2795   }
2796 
2797   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2798   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2800   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 /*@C
2805    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2806    (the default parallel PETSc format).
2807 
2808    Collective on MPI_Comm
2809 
2810    Input Parameters:
2811 +  B - the matrix
2812 .  bs - the block size
2813 .  i - the indices into j for the start of each local row (starts with zero)
2814 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2815 -  v - optional values in the matrix
2816 
2817    Level: developer
2818 
2819    Notes:
2820     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2821    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2822    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2823    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2824    block column and the second index is over columns within a block.
2825 
2826 .keywords: matrix, aij, compressed row, sparse, parallel
2827 
2828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2829 @*/
2830 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2831 {
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2836   PetscValidType(B,1);
2837   PetscValidLogicalCollectiveInt(B,bs,2);
2838   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2843 {
2844   Mat_MPIBAIJ    *b;
2845   PetscErrorCode ierr;
2846   PetscInt       i;
2847 
2848   PetscFunctionBegin;
2849   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2850   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2851   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2852   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2853 
2854   if (d_nnz) {
2855     for (i=0; i<B->rmap->n/bs; i++) {
2856       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2857     }
2858   }
2859   if (o_nnz) {
2860     for (i=0; i<B->rmap->n/bs; i++) {
2861       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2862     }
2863   }
2864 
2865   b      = (Mat_MPIBAIJ*)B->data;
2866   b->bs2 = bs*bs;
2867   b->mbs = B->rmap->n/bs;
2868   b->nbs = B->cmap->n/bs;
2869   b->Mbs = B->rmap->N/bs;
2870   b->Nbs = B->cmap->N/bs;
2871 
2872   for (i=0; i<=b->size; i++) {
2873     b->rangebs[i] = B->rmap->range[i]/bs;
2874   }
2875   b->rstartbs = B->rmap->rstart/bs;
2876   b->rendbs   = B->rmap->rend/bs;
2877   b->cstartbs = B->cmap->rstart/bs;
2878   b->cendbs   = B->cmap->rend/bs;
2879 
2880 #if defined(PETSC_USE_CTABLE)
2881   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2882 #else
2883   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2884 #endif
2885   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2886   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2887   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2888 
2889   /* Because the B will have been resized we simply destroy it and create a new one each time */
2890   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2891   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2892   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2893   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2894   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2895 
2896   if (!B->preallocated) {
2897     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2898     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2899     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2900     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2901     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2902   }
2903 
2904   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2905   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2906   B->preallocated  = PETSC_TRUE;
2907   B->was_assembled = PETSC_FALSE;
2908   B->assembled     = PETSC_FALSE;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2913 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2914 
2915 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2916 {
2917   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2918   PetscErrorCode ierr;
2919   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2920   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2921   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2922 
2923   PetscFunctionBegin;
2924   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2925   ii[0] = 0;
2926   for (i=0; i<M; i++) {
2927     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2928     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2929     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2930     /* remove one from count of matrix has diagonal */
2931     for (j=id[i]; j<id[i+1]; j++) {
2932       if (jd[j] == i) {ii[i+1]--;break;}
2933     }
2934   }
2935   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2936   cnt  = 0;
2937   for (i=0; i<M; i++) {
2938     for (j=io[i]; j<io[i+1]; j++) {
2939       if (garray[jo[j]] > rstart) break;
2940       jj[cnt++] = garray[jo[j]];
2941     }
2942     for (k=id[i]; k<id[i+1]; k++) {
2943       if (jd[k] != i) {
2944         jj[cnt++] = rstart + jd[k];
2945       }
2946     }
2947     for (; j<io[i+1]; j++) {
2948       jj[cnt++] = garray[jo[j]];
2949     }
2950   }
2951   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2956 
2957 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2958 
2959 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2960 {
2961   PetscErrorCode ierr;
2962   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2963   Mat            B;
2964   Mat_MPIAIJ     *b;
2965 
2966   PetscFunctionBegin;
2967   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2968 
2969   if (reuse == MAT_REUSE_MATRIX) {
2970     B = *newmat;
2971   } else {
2972     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2973     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2974     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2975     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2976     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2977     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2978   }
2979   b = (Mat_MPIAIJ*) B->data;
2980 
2981   if (reuse == MAT_REUSE_MATRIX) {
2982     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2983     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2984   } else {
2985     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2986     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2987     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2988     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2989     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2990     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2991     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2992   }
2993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995 
2996   if (reuse == MAT_INPLACE_MATRIX) {
2997     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2998   } else {
2999    *newmat = B;
3000   }
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 /*MC
3005    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3006 
3007    Options Database Keys:
3008 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3009 . -mat_block_size <bs> - set the blocksize used to store the matrix
3010 - -mat_use_hash_table <fact>
3011 
3012   Level: beginner
3013 
3014 .seealso: MatCreateMPIBAIJ
3015 M*/
3016 
3017 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3018 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3019 
3020 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3021 {
3022   Mat_MPIBAIJ    *b;
3023   PetscErrorCode ierr;
3024   PetscBool      flg = PETSC_FALSE;
3025 
3026   PetscFunctionBegin;
3027   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3028   B->data = (void*)b;
3029 
3030   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3031   B->assembled = PETSC_FALSE;
3032 
3033   B->insertmode = NOT_SET_VALUES;
3034   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3035   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3036 
3037   /* build local table of row and column ownerships */
3038   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3039 
3040   /* build cache for off array entries formed */
3041   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3042 
3043   b->donotstash  = PETSC_FALSE;
3044   b->colmap      = NULL;
3045   b->garray      = NULL;
3046   b->roworiented = PETSC_TRUE;
3047 
3048   /* stuff used in block assembly */
3049   b->barray = 0;
3050 
3051   /* stuff used for matrix vector multiply */
3052   b->lvec  = 0;
3053   b->Mvctx = 0;
3054 
3055   /* stuff for MatGetRow() */
3056   b->rowindices   = 0;
3057   b->rowvalues    = 0;
3058   b->getrowactive = PETSC_FALSE;
3059 
3060   /* hash table stuff */
3061   b->ht           = 0;
3062   b->hd           = 0;
3063   b->ht_size      = 0;
3064   b->ht_flag      = PETSC_FALSE;
3065   b->ht_fact      = 0;
3066   b->ht_total_ct  = 0;
3067   b->ht_insert_ct = 0;
3068 
3069   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3070   b->ijonly = PETSC_FALSE;
3071 
3072 
3073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3076 #if defined(PETSC_HAVE_HYPRE)
3077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3078 #endif
3079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3087   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3088 
3089   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3090   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3091   if (flg) {
3092     PetscReal fact = 1.39;
3093     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3094     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3095     if (fact <= 1.0) fact = 1.39;
3096     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3097     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3098   }
3099   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 /*MC
3104    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3105 
3106    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3107    and MATMPIBAIJ otherwise.
3108 
3109    Options Database Keys:
3110 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3111 
3112   Level: beginner
3113 
3114 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3115 M*/
3116 
3117 /*@C
3118    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3119    (block compressed row).  For good matrix assembly performance
3120    the user should preallocate the matrix storage by setting the parameters
3121    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3122    performance can be increased by more than a factor of 50.
3123 
3124    Collective on Mat
3125 
3126    Input Parameters:
3127 +  B - the matrix
3128 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3129           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3130 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3131            submatrix  (same for all local rows)
3132 .  d_nnz - array containing the number of block nonzeros in the various block rows
3133            of the in diagonal portion of the local (possibly different for each block
3134            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3135            set it even if it is zero.
3136 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3137            submatrix (same for all local rows).
3138 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3139            off-diagonal portion of the local submatrix (possibly different for
3140            each block row) or NULL.
3141 
3142    If the *_nnz parameter is given then the *_nz parameter is ignored
3143 
3144    Options Database Keys:
3145 +   -mat_block_size - size of the blocks to use
3146 -   -mat_use_hash_table <fact>
3147 
3148    Notes:
3149    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3150    than it must be used on all processors that share the object for that argument.
3151 
3152    Storage Information:
3153    For a square global matrix we define each processor's diagonal portion
3154    to be its local rows and the corresponding columns (a square submatrix);
3155    each processor's off-diagonal portion encompasses the remainder of the
3156    local matrix (a rectangular submatrix).
3157 
3158    The user can specify preallocated storage for the diagonal part of
3159    the local submatrix with either d_nz or d_nnz (not both).  Set
3160    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3161    memory allocation.  Likewise, specify preallocated storage for the
3162    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3163 
3164    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3165    the figure below we depict these three local rows and all columns (0-11).
3166 
3167 .vb
3168            0 1 2 3 4 5 6 7 8 9 10 11
3169           --------------------------
3170    row 3  |o o o d d d o o o o  o  o
3171    row 4  |o o o d d d o o o o  o  o
3172    row 5  |o o o d d d o o o o  o  o
3173           --------------------------
3174 .ve
3175 
3176    Thus, any entries in the d locations are stored in the d (diagonal)
3177    submatrix, and any entries in the o locations are stored in the
3178    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3179    stored simply in the MATSEQBAIJ format for compressed row storage.
3180 
3181    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3182    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3183    In general, for PDE problems in which most nonzeros are near the diagonal,
3184    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3185    or you will get TERRIBLE performance; see the users' manual chapter on
3186    matrices.
3187 
3188    You can call MatGetInfo() to get information on how effective the preallocation was;
3189    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3190    You can also run with the option -info and look for messages with the string
3191    malloc in them to see if additional memory allocation was needed.
3192 
3193    Level: intermediate
3194 
3195 .keywords: matrix, block, aij, compressed row, sparse, parallel
3196 
3197 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3198 @*/
3199 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3200 {
3201   PetscErrorCode ierr;
3202 
3203   PetscFunctionBegin;
3204   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3205   PetscValidType(B,1);
3206   PetscValidLogicalCollectiveInt(B,bs,2);
3207   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 /*@C
3212    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3213    (block compressed row).  For good matrix assembly performance
3214    the user should preallocate the matrix storage by setting the parameters
3215    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3216    performance can be increased by more than a factor of 50.
3217 
3218    Collective on MPI_Comm
3219 
3220    Input Parameters:
3221 +  comm - MPI communicator
3222 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3223           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3224 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3225            This value should be the same as the local size used in creating the
3226            y vector for the matrix-vector product y = Ax.
3227 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3228            This value should be the same as the local size used in creating the
3229            x vector for the matrix-vector product y = Ax.
3230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3232 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3233            submatrix  (same for all local rows)
3234 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3235            of the in diagonal portion of the local (possibly different for each block
3236            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3237            and set it even if it is zero.
3238 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3239            submatrix (same for all local rows).
3240 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3241            off-diagonal portion of the local submatrix (possibly different for
3242            each block row) or NULL.
3243 
3244    Output Parameter:
3245 .  A - the matrix
3246 
3247    Options Database Keys:
3248 +   -mat_block_size - size of the blocks to use
3249 -   -mat_use_hash_table <fact>
3250 
3251    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3252    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3253    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3254 
3255    Notes:
3256    If the *_nnz parameter is given then the *_nz parameter is ignored
3257 
3258    A nonzero block is any block that as 1 or more nonzeros in it
3259 
3260    The user MUST specify either the local or global matrix dimensions
3261    (possibly both).
3262 
3263    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3264    than it must be used on all processors that share the object for that argument.
3265 
3266    Storage Information:
3267    For a square global matrix we define each processor's diagonal portion
3268    to be its local rows and the corresponding columns (a square submatrix);
3269    each processor's off-diagonal portion encompasses the remainder of the
3270    local matrix (a rectangular submatrix).
3271 
3272    The user can specify preallocated storage for the diagonal part of
3273    the local submatrix with either d_nz or d_nnz (not both).  Set
3274    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3275    memory allocation.  Likewise, specify preallocated storage for the
3276    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3277 
3278    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3279    the figure below we depict these three local rows and all columns (0-11).
3280 
3281 .vb
3282            0 1 2 3 4 5 6 7 8 9 10 11
3283           --------------------------
3284    row 3  |o o o d d d o o o o  o  o
3285    row 4  |o o o d d d o o o o  o  o
3286    row 5  |o o o d d d o o o o  o  o
3287           --------------------------
3288 .ve
3289 
3290    Thus, any entries in the d locations are stored in the d (diagonal)
3291    submatrix, and any entries in the o locations are stored in the
3292    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3293    stored simply in the MATSEQBAIJ format for compressed row storage.
3294 
3295    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3296    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3297    In general, for PDE problems in which most nonzeros are near the diagonal,
3298    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3299    or you will get TERRIBLE performance; see the users' manual chapter on
3300    matrices.
3301 
3302    Level: intermediate
3303 
3304 .keywords: matrix, block, aij, compressed row, sparse, parallel
3305 
3306 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3307 @*/
3308 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3309 {
3310   PetscErrorCode ierr;
3311   PetscMPIInt    size;
3312 
3313   PetscFunctionBegin;
3314   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3315   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3317   if (size > 1) {
3318     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3319     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3320   } else {
3321     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3322     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3323   }
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3328 {
3329   Mat            mat;
3330   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3331   PetscErrorCode ierr;
3332   PetscInt       len=0;
3333 
3334   PetscFunctionBegin;
3335   *newmat = 0;
3336   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3337   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3338   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3339   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3340 
3341   mat->factortype   = matin->factortype;
3342   mat->preallocated = PETSC_TRUE;
3343   mat->assembled    = PETSC_TRUE;
3344   mat->insertmode   = NOT_SET_VALUES;
3345 
3346   a             = (Mat_MPIBAIJ*)mat->data;
3347   mat->rmap->bs = matin->rmap->bs;
3348   a->bs2        = oldmat->bs2;
3349   a->mbs        = oldmat->mbs;
3350   a->nbs        = oldmat->nbs;
3351   a->Mbs        = oldmat->Mbs;
3352   a->Nbs        = oldmat->Nbs;
3353 
3354   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3355   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3356 
3357   a->size         = oldmat->size;
3358   a->rank         = oldmat->rank;
3359   a->donotstash   = oldmat->donotstash;
3360   a->roworiented  = oldmat->roworiented;
3361   a->rowindices   = 0;
3362   a->rowvalues    = 0;
3363   a->getrowactive = PETSC_FALSE;
3364   a->barray       = 0;
3365   a->rstartbs     = oldmat->rstartbs;
3366   a->rendbs       = oldmat->rendbs;
3367   a->cstartbs     = oldmat->cstartbs;
3368   a->cendbs       = oldmat->cendbs;
3369 
3370   /* hash table stuff */
3371   a->ht           = 0;
3372   a->hd           = 0;
3373   a->ht_size      = 0;
3374   a->ht_flag      = oldmat->ht_flag;
3375   a->ht_fact      = oldmat->ht_fact;
3376   a->ht_total_ct  = 0;
3377   a->ht_insert_ct = 0;
3378 
3379   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3380   if (oldmat->colmap) {
3381 #if defined(PETSC_USE_CTABLE)
3382     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3383 #else
3384     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3385     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3386     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3387 #endif
3388   } else a->colmap = 0;
3389 
3390   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3391     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3392     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3393     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3394   } else a->garray = 0;
3395 
3396   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3397   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3398   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3399   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3400   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3401 
3402   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3403   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3404   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3405   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3406   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3407   *newmat = mat;
3408   PetscFunctionReturn(0);
3409 }
3410 
3411 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3412 {
3413   PetscErrorCode ierr;
3414   int            fd;
3415   PetscInt       i,nz,j,rstart,rend;
3416   PetscScalar    *vals,*buf;
3417   MPI_Comm       comm;
3418   MPI_Status     status;
3419   PetscMPIInt    rank,size,maxnz;
3420   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3421   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3422   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3423   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3424   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3425   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3426   PetscBool      isbinary;
3427 
3428   PetscFunctionBegin;
3429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3430   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3431 
3432   /* force binary viewer to load .info file if it has not yet done so */
3433   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3434   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3435   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3436   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3437   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3438   if (bs < 0) bs = 1;
3439 
3440   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3442   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3443   if (!rank) {
3444     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3445     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3446     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3447   }
3448   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3449   M    = header[1]; N = header[2];
3450 
3451   /* If global sizes are set, check if they are consistent with that given in the file */
3452   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3453   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3454 
3455   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3456 
3457   /*
3458      This code adds extra rows to make sure the number of rows is
3459      divisible by the blocksize
3460   */
3461   Mbs        = M/bs;
3462   extra_rows = bs - M + bs*Mbs;
3463   if (extra_rows == bs) extra_rows = 0;
3464   else                  Mbs++;
3465   if (extra_rows && !rank) {
3466     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3467   }
3468 
3469   /* determine ownership of all rows */
3470   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3471     mbs = Mbs/size + ((Mbs % size) > rank);
3472     m   = mbs*bs;
3473   } else { /* User set */
3474     m   = newmat->rmap->n;
3475     mbs = m/bs;
3476   }
3477   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3478   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3479 
3480   /* process 0 needs enough room for process with most rows */
3481   if (!rank) {
3482     mmax = rowners[1];
3483     for (i=2; i<=size; i++) {
3484       mmax = PetscMax(mmax,rowners[i]);
3485     }
3486     mmax*=bs;
3487   } else mmax = -1;             /* unused, but compiler warns anyway */
3488 
3489   rowners[0] = 0;
3490   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3491   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3492   rstart = rowners[rank];
3493   rend   = rowners[rank+1];
3494 
3495   /* distribute row lengths to all processors */
3496   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3497   if (!rank) {
3498     mend = m;
3499     if (size == 1) mend = mend - extra_rows;
3500     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3501     for (j=mend; j<m; j++) locrowlens[j] = 1;
3502     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3503     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3504     for (j=0; j<m; j++) {
3505       procsnz[0] += locrowlens[j];
3506     }
3507     for (i=1; i<size; i++) {
3508       mend = browners[i+1] - browners[i];
3509       if (i == size-1) mend = mend - extra_rows;
3510       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3511       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3512       /* calculate the number of nonzeros on each processor */
3513       for (j=0; j<browners[i+1]-browners[i]; j++) {
3514         procsnz[i] += rowlengths[j];
3515       }
3516       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3517     }
3518     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3519   } else {
3520     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3521   }
3522 
3523   if (!rank) {
3524     /* determine max buffer needed and allocate it */
3525     maxnz = procsnz[0];
3526     for (i=1; i<size; i++) {
3527       maxnz = PetscMax(maxnz,procsnz[i]);
3528     }
3529     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3530 
3531     /* read in my part of the matrix column indices  */
3532     nz     = procsnz[0];
3533     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3534     mycols = ibuf;
3535     if (size == 1) nz -= extra_rows;
3536     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3537     if (size == 1) {
3538       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3539     }
3540 
3541     /* read in every ones (except the last) and ship off */
3542     for (i=1; i<size-1; i++) {
3543       nz   = procsnz[i];
3544       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3545       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3546     }
3547     /* read in the stuff for the last proc */
3548     if (size != 1) {
3549       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3550       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3551       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3552       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3553     }
3554     ierr = PetscFree(cols);CHKERRQ(ierr);
3555   } else {
3556     /* determine buffer space needed for message */
3557     nz = 0;
3558     for (i=0; i<m; i++) {
3559       nz += locrowlens[i];
3560     }
3561     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3562     mycols = ibuf;
3563     /* receive message of column indices*/
3564     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3565     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3566     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3567   }
3568 
3569   /* loop over local rows, determining number of off diagonal entries */
3570   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3571   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3572   rowcount = 0; nzcount = 0;
3573   for (i=0; i<mbs; i++) {
3574     dcount  = 0;
3575     odcount = 0;
3576     for (j=0; j<bs; j++) {
3577       kmax = locrowlens[rowcount];
3578       for (k=0; k<kmax; k++) {
3579         tmp = mycols[nzcount++]/bs;
3580         if (!mask[tmp]) {
3581           mask[tmp] = 1;
3582           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3583           else masked1[dcount++] = tmp;
3584         }
3585       }
3586       rowcount++;
3587     }
3588 
3589     dlens[i]  = dcount;
3590     odlens[i] = odcount;
3591 
3592     /* zero out the mask elements we set */
3593     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3594     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3595   }
3596 
3597   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3598   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3599 
3600   if (!rank) {
3601     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3602     /* read in my part of the matrix numerical values  */
3603     nz     = procsnz[0];
3604     vals   = buf;
3605     mycols = ibuf;
3606     if (size == 1) nz -= extra_rows;
3607     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3608     if (size == 1) {
3609       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3610     }
3611 
3612     /* insert into matrix */
3613     jj = rstart*bs;
3614     for (i=0; i<m; i++) {
3615       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3616       mycols += locrowlens[i];
3617       vals   += locrowlens[i];
3618       jj++;
3619     }
3620     /* read in other processors (except the last one) and ship out */
3621     for (i=1; i<size-1; i++) {
3622       nz   = procsnz[i];
3623       vals = buf;
3624       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3625       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3626     }
3627     /* the last proc */
3628     if (size != 1) {
3629       nz   = procsnz[i] - extra_rows;
3630       vals = buf;
3631       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3632       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3633       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3634     }
3635     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3636   } else {
3637     /* receive numeric values */
3638     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3639 
3640     /* receive message of values*/
3641     vals   = buf;
3642     mycols = ibuf;
3643     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3644 
3645     /* insert into matrix */
3646     jj = rstart*bs;
3647     for (i=0; i<m; i++) {
3648       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3649       mycols += locrowlens[i];
3650       vals   += locrowlens[i];
3651       jj++;
3652     }
3653   }
3654   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3655   ierr = PetscFree(buf);CHKERRQ(ierr);
3656   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3657   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3658   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3659   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3660   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3662   PetscFunctionReturn(0);
3663 }
3664 
3665 /*@
3666    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3667 
3668    Input Parameters:
3669 .  mat  - the matrix
3670 .  fact - factor
3671 
3672    Not Collective, each process can use a different factor
3673 
3674    Level: advanced
3675 
3676   Notes:
3677    This can also be set by the command line option: -mat_use_hash_table <fact>
3678 
3679 .keywords: matrix, hashtable, factor, HT
3680 
3681 .seealso: MatSetOption()
3682 @*/
3683 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3684 {
3685   PetscErrorCode ierr;
3686 
3687   PetscFunctionBegin;
3688   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3689   PetscFunctionReturn(0);
3690 }
3691 
3692 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3693 {
3694   Mat_MPIBAIJ *baij;
3695 
3696   PetscFunctionBegin;
3697   baij          = (Mat_MPIBAIJ*)mat->data;
3698   baij->ht_fact = fact;
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3703 {
3704   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3705   PetscBool      flg;
3706   PetscErrorCode ierr;
3707 
3708   PetscFunctionBegin;
3709   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3710   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3711   if (Ad)     *Ad     = a->A;
3712   if (Ao)     *Ao     = a->B;
3713   if (colmap) *colmap = a->garray;
3714   PetscFunctionReturn(0);
3715 }
3716 
3717 /*
3718     Special version for direct calls from Fortran (to eliminate two function call overheads
3719 */
3720 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3721 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3722 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3723 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3724 #endif
3725 
3726 /*@C
3727   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3728 
3729   Collective on Mat
3730 
3731   Input Parameters:
3732 + mat - the matrix
3733 . min - number of input rows
3734 . im - input rows
3735 . nin - number of input columns
3736 . in - input columns
3737 . v - numerical values input
3738 - addvin - INSERT_VALUES or ADD_VALUES
3739 
3740   Notes:
3741     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3742 
3743   Level: advanced
3744 
3745 .seealso:   MatSetValuesBlocked()
3746 @*/
3747 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3748 {
3749   /* convert input arguments to C version */
3750   Mat        mat  = *matin;
3751   PetscInt   m    = *min, n = *nin;
3752   InsertMode addv = *addvin;
3753 
3754   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3755   const MatScalar *value;
3756   MatScalar       *barray     = baij->barray;
3757   PetscBool       roworiented = baij->roworiented;
3758   PetscErrorCode  ierr;
3759   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3760   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3761   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3762 
3763   PetscFunctionBegin;
3764   /* tasks normally handled by MatSetValuesBlocked() */
3765   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3766 #if defined(PETSC_USE_DEBUG)
3767   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3768   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3769 #endif
3770   if (mat->assembled) {
3771     mat->was_assembled = PETSC_TRUE;
3772     mat->assembled     = PETSC_FALSE;
3773   }
3774   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3775 
3776 
3777   if (!barray) {
3778     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3779     baij->barray = barray;
3780   }
3781 
3782   if (roworiented) stepval = (n-1)*bs;
3783   else stepval = (m-1)*bs;
3784 
3785   for (i=0; i<m; i++) {
3786     if (im[i] < 0) continue;
3787 #if defined(PETSC_USE_DEBUG)
3788     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3789 #endif
3790     if (im[i] >= rstart && im[i] < rend) {
3791       row = im[i] - rstart;
3792       for (j=0; j<n; j++) {
3793         /* If NumCol = 1 then a copy is not required */
3794         if ((roworiented) && (n == 1)) {
3795           barray = (MatScalar*)v + i*bs2;
3796         } else if ((!roworiented) && (m == 1)) {
3797           barray = (MatScalar*)v + j*bs2;
3798         } else { /* Here a copy is required */
3799           if (roworiented) {
3800             value = v + i*(stepval+bs)*bs + j*bs;
3801           } else {
3802             value = v + j*(stepval+bs)*bs + i*bs;
3803           }
3804           for (ii=0; ii<bs; ii++,value+=stepval) {
3805             for (jj=0; jj<bs; jj++) {
3806               *barray++ = *value++;
3807             }
3808           }
3809           barray -=bs2;
3810         }
3811 
3812         if (in[j] >= cstart && in[j] < cend) {
3813           col  = in[j] - cstart;
3814           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3815         } else if (in[j] < 0) continue;
3816 #if defined(PETSC_USE_DEBUG)
3817         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3818 #endif
3819         else {
3820           if (mat->was_assembled) {
3821             if (!baij->colmap) {
3822               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3823             }
3824 
3825 #if defined(PETSC_USE_DEBUG)
3826 #if defined(PETSC_USE_CTABLE)
3827             { PetscInt data;
3828               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3829               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3830             }
3831 #else
3832             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3833 #endif
3834 #endif
3835 #if defined(PETSC_USE_CTABLE)
3836             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3837             col  = (col - 1)/bs;
3838 #else
3839             col = (baij->colmap[in[j]] - 1)/bs;
3840 #endif
3841             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3842               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3843               col  =  in[j];
3844             }
3845           } else col = in[j];
3846           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3847         }
3848       }
3849     } else {
3850       if (!baij->donotstash) {
3851         if (roworiented) {
3852           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3853         } else {
3854           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3855         }
3856       }
3857     }
3858   }
3859 
3860   /* task normally handled by MatSetValuesBlocked() */
3861   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 /*@
3866      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3867          CSR format the local rows.
3868 
3869    Collective on MPI_Comm
3870 
3871    Input Parameters:
3872 +  comm - MPI communicator
3873 .  bs - the block size, only a block size of 1 is supported
3874 .  m - number of local rows (Cannot be PETSC_DECIDE)
3875 .  n - This value should be the same as the local size used in creating the
3876        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3877        calculated if N is given) For square matrices n is almost always m.
3878 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3879 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3880 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3881 .   j - column indices
3882 -   a - matrix values
3883 
3884    Output Parameter:
3885 .   mat - the matrix
3886 
3887    Level: intermediate
3888 
3889    Notes:
3890        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3891      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3892      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3893 
3894      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3895      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3896      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3897      with column-major ordering within blocks.
3898 
3899        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3900 
3901 .keywords: matrix, aij, compressed row, sparse, parallel
3902 
3903 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3904           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3905 @*/
3906 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3907 {
3908   PetscErrorCode ierr;
3909 
3910   PetscFunctionBegin;
3911   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3912   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3913   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3914   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3915   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3916   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3917   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3918   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3919   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3920   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3921   PetscFunctionReturn(0);
3922 }
3923 
3924 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3925 {
3926   PetscErrorCode ierr;
3927   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3928   PetscInt       *indx;
3929   PetscScalar    *values;
3930 
3931   PetscFunctionBegin;
3932   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3933   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3934     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3935     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3936     PetscInt       *bindx,rmax=a->rmax,j;
3937     PetscMPIInt    rank,size;
3938 
3939     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3940     mbs = m/bs; Nbs = N/cbs;
3941     if (n == PETSC_DECIDE) {
3942       nbs  = n;
3943       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3944       n    = nbs*cbs;
3945     } else {
3946       nbs = n/cbs;
3947     }
3948 
3949     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3950     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3951 
3952     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3953     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3954     if (rank == size-1) {
3955       /* Check sum(nbs) = Nbs */
3956       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3957     }
3958 
3959     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3960     for (i=0; i<mbs; i++) {
3961       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3962       nnz = nnz/bs;
3963       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3964       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3965       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3966     }
3967     ierr = PetscFree(bindx);CHKERRQ(ierr);
3968 
3969     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3970     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3971     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3972     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3973     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3974     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3975     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3976   }
3977 
3978   /* numeric phase */
3979   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3980   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3981 
3982   for (i=0; i<m; i++) {
3983     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3984     Ii   = i + rstart;
3985     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3986     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3987   }
3988   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3989   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3990   PetscFunctionReturn(0);
3991 }
3992