xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e5a36eccef3d6b83a2c625c30d0dfd5adb4001f2)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
92   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
98   { \
99  \
100     brow = row/bs;  \
101     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
102     rmax = aimax[brow]; nrow = ailen[brow]; \
103     bcol = col/bs; \
104     ridx = row % bs; cidx = col % bs; \
105     low  = 0; high = nrow; \
106     while (high-low > 3) { \
107       t = (low+high)/2; \
108       if (rp[t] > bcol) high = t; \
109       else              low  = t; \
110     } \
111     for (_i=low; _i<high; _i++) { \
112       if (rp[_i] > bcol) break; \
113       if (rp[_i] == bcol) { \
114         bap = ap +  bs2*_i + bs*cidx + ridx; \
115         if (addv == ADD_VALUES) *bap += value;  \
116         else                    *bap  = value;  \
117         goto a_noinsert; \
118       } \
119     } \
120     if (a->nonew == 1) goto a_noinsert; \
121     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
122     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
123     N = nrow++ - 1;  \
124     /* shift up all the later entries in this row */ \
125     for (ii=N; ii>=_i; ii--) { \
126       rp[ii+1] = rp[ii]; \
127       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
128     } \
129     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
130     rp[_i]                      = bcol;  \
131     ap[bs2*_i + bs*cidx + ridx] = value;  \
132 a_noinsert:; \
133     ailen[brow] = nrow; \
134   }
135 
136 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
137   { \
138     brow = row/bs;  \
139     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
140     rmax = bimax[brow]; nrow = bilen[brow]; \
141     bcol = col/bs; \
142     ridx = row % bs; cidx = col % bs; \
143     low  = 0; high = nrow; \
144     while (high-low > 3) { \
145       t = (low+high)/2; \
146       if (rp[t] > bcol) high = t; \
147       else              low  = t; \
148     } \
149     for (_i=low; _i<high; _i++) { \
150       if (rp[_i] > bcol) break; \
151       if (rp[_i] == bcol) { \
152         bap = ap +  bs2*_i + bs*cidx + ridx; \
153         if (addv == ADD_VALUES) *bap += value;  \
154         else                    *bap  = value;  \
155         goto b_noinsert; \
156       } \
157     } \
158     if (b->nonew == 1) goto b_noinsert; \
159     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
160     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
161     N = nrow++ - 1;  \
162     /* shift up all the later entries in this row */ \
163     for (ii=N; ii>=_i; ii--) { \
164       rp[ii+1] = rp[ii]; \
165       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
166     } \
167     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
168     rp[_i]                      = bcol;  \
169     ap[bs2*_i + bs*cidx + ridx] = value;  \
170 b_noinsert:; \
171     bilen[brow] = nrow; \
172   }
173 
174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
175 {
176   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
177   MatScalar      value;
178   PetscBool      roworiented = baij->roworiented;
179   PetscErrorCode ierr;
180   PetscInt       i,j,row,col;
181   PetscInt       rstart_orig=mat->rmap->rstart;
182   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
183   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
184 
185   /* Some Variables required in the macro */
186   Mat         A     = baij->A;
187   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
188   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
189   MatScalar   *aa   =a->a;
190 
191   Mat         B     = baij->B;
192   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
193   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
194   MatScalar   *ba   =b->a;
195 
196   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
197   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
198   MatScalar *ap,*bap;
199 
200   PetscFunctionBegin;
201   for (i=0; i<m; i++) {
202     if (im[i] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
205 #endif
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j=0; j<n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i*n+j];
212           else             value = v[i+j*m];
213           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
214           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
215         } else if (in[j] < 0) continue;
216 #if defined(PETSC_USE_DEBUG)
217         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!baij->colmap) {
222               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
223             }
224 #if defined(PETSC_USE_CTABLE)
225             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
226             col  = col - 1;
227 #else
228             col = baij->colmap[in[j]/bs] - 1;
229 #endif
230             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
231               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
232               col  =  in[j];
233               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
234               B    = baij->B;
235               b    = (Mat_SeqBAIJ*)(B)->data;
236               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
237               ba   =b->a;
238             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
239             else col += in[j]%bs;
240           } else col = in[j];
241           if (roworiented) value = v[i*n+j];
242           else             value = v[i+j*m];
243           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
244           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
245         }
246       }
247     } else {
248       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
249       if (!baij->donotstash) {
250         mat->assembled = PETSC_FALSE;
251         if (roworiented) {
252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
253         } else {
254           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
263 {
264   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
265   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
266   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
267   PetscErrorCode    ierr;
268   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
269   PetscBool         roworiented=a->roworiented;
270   const PetscScalar *value     = v;
271   MatScalar         *ap,*aa = a->a,*bap;
272 
273   PetscFunctionBegin;
274   rp   = aj + ai[row];
275   ap   = aa + bs2*ai[row];
276   rmax = imax[row];
277   nrow = ailen[row];
278   value = v;
279   low = 0;
280   high = nrow;
281   while (high-low > 7) {
282     t = (low+high)/2;
283     if (rp[t] > col) high = t;
284     else             low  = t;
285   }
286   for (i=low; i<high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap +  bs2*i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii=0; ii<bs; ii++) {
293             for (jj=ii; jj<bs2; jj+=bs) {
294               bap[jj] += *value++;
295             }
296           }
297         } else {
298           for (ii=0; ii<bs; ii++) {
299             for (jj=ii; jj<bs2; jj+=bs) {
300               bap[jj] = *value++;
301             }
302           }
303         }
304       } else {
305         if (is == ADD_VALUES) {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj] += value[jj];
309             }
310             bap += bs;
311           }
312         } else {
313           for (ii=0; ii<bs; ii++,value+=bs) {
314             for (jj=0; jj<bs; jj++) {
315               bap[jj]  = value[jj];
316             }
317             bap += bs;
318           }
319         }
320       }
321       goto noinsert2;
322     }
323   }
324   if (nonew == 1) goto noinsert2;
325   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
326   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
327   N = nrow++ - 1; high++;
328   /* shift up all the later entries in this row */
329   for (ii=N; ii>=i; ii--) {
330     rp[ii+1] = rp[ii];
331     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
332   }
333   if (N >= i) {
334     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
335   }
336   rp[i] = col;
337   bap   = ap +  bs2*i;
338   if (roworiented) {
339     for (ii=0; ii<bs; ii++) {
340       for (jj=ii; jj<bs2; jj+=bs) {
341         bap[jj] = *value++;
342       }
343     }
344   } else {
345     for (ii=0; ii<bs; ii++) {
346       for (jj=0; jj<bs; jj++) {
347         *bap++ = *value++;
348       }
349     }
350   }
351   noinsert2:;
352   ailen[row] = nrow;
353   PetscFunctionReturn(0);
354 }
355 
356 /*
357     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
358     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
359 */
360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
361 {
362   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
363   const PetscScalar *value;
364   MatScalar         *barray     = baij->barray;
365   PetscBool         roworiented = baij->roworiented;
366   PetscErrorCode    ierr;
367   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
368   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
369   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
370 
371   PetscFunctionBegin;
372   if (!barray) {
373     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
374     baij->barray = barray;
375   }
376 
377   if (roworiented) stepval = (n-1)*bs;
378   else stepval = (m-1)*bs;
379 
380   for (i=0; i<m; i++) {
381     if (im[i] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
384 #endif
385     if (im[i] >= rstart && im[i] < rend) {
386       row = im[i] - rstart;
387       for (j=0; j<n; j++) {
388         /* If NumCol = 1 then a copy is not required */
389         if ((roworiented) && (n == 1)) {
390           barray = (MatScalar*)v + i*bs2;
391         } else if ((!roworiented) && (m == 1)) {
392           barray = (MatScalar*)v + j*bs2;
393         } else { /* Here a copy is required */
394           if (roworiented) {
395             value = v + (i*(stepval+bs) + j)*bs;
396           } else {
397             value = v + (j*(stepval+bs) + i)*bs;
398           }
399           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
400             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
401             barray += bs;
402           }
403           barray -= bs2;
404         }
405 
406         if (in[j] >= cstart && in[j] < cend) {
407           col  = in[j] - cstart;
408           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
409         } else if (in[j] < 0) continue;
410 #if defined(PETSC_USE_DEBUG)
411         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
412 #endif
413         else {
414           if (mat->was_assembled) {
415             if (!baij->colmap) {
416               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
417             }
418 
419 #if defined(PETSC_USE_DEBUG)
420 #if defined(PETSC_USE_CTABLE)
421             { PetscInt data;
422               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
423               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
424             }
425 #else
426             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
427 #endif
428 #endif
429 #if defined(PETSC_USE_CTABLE)
430             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431             col  = (col - 1)/bs;
432 #else
433             col = (baij->colmap[in[j]] - 1)/bs;
434 #endif
435             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
436               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
437               col  =  in[j];
438             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
439           } else col = in[j];
440           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
441         }
442       }
443     } else {
444       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #define HASH_KEY 0.6180339887
458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
462 {
463   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
464   PetscBool      roworiented = baij->roworiented;
465   PetscErrorCode ierr;
466   PetscInt       i,j,row,col;
467   PetscInt       rstart_orig=mat->rmap->rstart;
468   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
469   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
470   PetscReal      tmp;
471   MatScalar      **HD = baij->hd,value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   for (i=0; i<m; i++) {
478 #if defined(PETSC_USE_DEBUG)
479     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
480     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
481 #endif
482     row = im[i];
483     if (row >= rstart_orig && row < rend_orig) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486         if (roworiented) value = v[i*n+j];
487         else             value = v[i+j*m];
488         /* Look up PetscInto the Hash Table */
489         key = (row/bs)*Nbs+(col/bs)+1;
490         h1  = HASH(size,key,tmp);
491 
492 
493         idx = h1;
494 #if defined(PETSC_USE_DEBUG)
495         insert_ct++;
496         total_ct++;
497         if (HT[idx] != key) {
498           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
499           if (idx == size) {
500             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
501             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
502           }
503         }
504 #else
505         if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
509             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510           }
511         }
512 #endif
513         /* A HASH table entry is found, so insert the values at the correct address */
514         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
515         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
516       }
517     } else if (!baij->donotstash) {
518       if (roworiented) {
519         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
520       } else {
521         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
522       }
523     }
524   }
525 #if defined(PETSC_USE_DEBUG)
526   baij->ht_total_ct  += total_ct;
527   baij->ht_insert_ct += insert_ct;
528 #endif
529   PetscFunctionReturn(0);
530 }
531 
532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
533 {
534   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
535   PetscBool         roworiented = baij->roworiented;
536   PetscErrorCode    ierr;
537   PetscInt          i,j,ii,jj,row,col;
538   PetscInt          rstart=baij->rstartbs;
539   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
540   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
541   PetscReal         tmp;
542   MatScalar         **HD = baij->hd,*baij_a;
543   const PetscScalar *v_t,*value;
544 #if defined(PETSC_USE_DEBUG)
545   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
546 #endif
547 
548   PetscFunctionBegin;
549   if (roworiented) stepval = (n-1)*bs;
550   else stepval = (m-1)*bs;
551 
552   for (i=0; i<m; i++) {
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
555     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
556 #endif
557     row = im[i];
558     v_t = v + i*nbs2;
559     if (row >= rstart && row < rend) {
560       for (j=0; j<n; j++) {
561         col = in[j];
562 
563         /* Look up into the Hash Table */
564         key = row*Nbs+col+1;
565         h1  = HASH(size,key,tmp);
566 
567         idx = h1;
568 #if defined(PETSC_USE_DEBUG)
569         total_ct++;
570         insert_ct++;
571         if (HT[idx] != key) {
572           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
573           if (idx == size) {
574             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
575             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
576           }
577         }
578 #else
579         if (HT[idx] != key) {
580           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
581           if (idx == size) {
582             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
583             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
584           }
585         }
586 #endif
587         baij_a = HD[idx];
588         if (roworiented) {
589           /*value = v + i*(stepval+bs)*bs + j*bs;*/
590           /* value = v + (i*(stepval+bs)+j)*bs; */
591           value = v_t;
592           v_t  += bs;
593           if (addv == ADD_VALUES) {
594             for (ii=0; ii<bs; ii++,value+=stepval) {
595               for (jj=ii; jj<bs2; jj+=bs) {
596                 baij_a[jj] += *value++;
597               }
598             }
599           } else {
600             for (ii=0; ii<bs; ii++,value+=stepval) {
601               for (jj=ii; jj<bs2; jj+=bs) {
602                 baij_a[jj] = *value++;
603               }
604             }
605           }
606         } else {
607           value = v + j*(stepval+bs)*bs + i*bs;
608           if (addv == ADD_VALUES) {
609             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
610               for (jj=0; jj<bs; jj++) {
611                 baij_a[jj] += *value++;
612               }
613             }
614           } else {
615             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
616               for (jj=0; jj<bs; jj++) {
617                 baij_a[jj] = *value++;
618               }
619             }
620           }
621         }
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_DEBUG)
634   baij->ht_total_ct  += total_ct;
635   baij->ht_insert_ct += insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
641 {
642   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
643   PetscErrorCode ierr;
644   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
645   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
646 
647   PetscFunctionBegin;
648   for (i=0; i<m; i++) {
649     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
650     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
651     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
652       row = idxm[i] - bsrstart;
653       for (j=0; j<n; j++) {
654         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
655         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
656         if (idxn[j] >= bscstart && idxn[j] < bscend) {
657           col  = idxn[j] - bscstart;
658           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
659         } else {
660           if (!baij->colmap) {
661             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
662           }
663 #if defined(PETSC_USE_CTABLE)
664           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
665           data--;
666 #else
667           data = baij->colmap[idxn[j]/bs]-1;
668 #endif
669           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
670           else {
671             col  = data + idxn[j]%bs;
672             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
673           }
674         }
675       }
676     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
682 {
683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
684   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
685   PetscErrorCode ierr;
686   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
687   PetscReal      sum = 0.0;
688   MatScalar      *v;
689 
690   PetscFunctionBegin;
691   if (baij->size == 1) {
692     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
693   } else {
694     if (type == NORM_FROBENIUS) {
695       v  = amat->a;
696       nz = amat->nz*bs2;
697       for (i=0; i<nz; i++) {
698         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
699       }
700       v  = bmat->a;
701       nz = bmat->nz*bs2;
702       for (i=0; i<nz; i++) {
703         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
704       }
705       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
706       *nrm = PetscSqrtReal(*nrm);
707     } else if (type == NORM_1) { /* max column sum */
708       PetscReal *tmp,*tmp2;
709       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
710       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
711       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
712       v    = amat->a; jj = amat->j;
713       for (i=0; i<amat->nz; i++) {
714         for (j=0; j<bs; j++) {
715           col = bs*(cstart + *jj) + j; /* column index */
716           for (row=0; row<bs; row++) {
717             tmp[col] += PetscAbsScalar(*v);  v++;
718           }
719         }
720         jj++;
721       }
722       v = bmat->a; jj = bmat->j;
723       for (i=0; i<bmat->nz; i++) {
724         for (j=0; j<bs; j++) {
725           col = bs*garray[*jj] + j;
726           for (row=0; row<bs; row++) {
727             tmp[col] += PetscAbsScalar(*v); v++;
728           }
729         }
730         jj++;
731       }
732       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
733       *nrm = 0.0;
734       for (j=0; j<mat->cmap->N; j++) {
735         if (tmp2[j] > *nrm) *nrm = tmp2[j];
736       }
737       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
738     } else if (type == NORM_INFINITY) { /* max row sum */
739       PetscReal *sums;
740       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
741       sum  = 0.0;
742       for (j=0; j<amat->mbs; j++) {
743         for (row=0; row<bs; row++) sums[row] = 0.0;
744         v  = amat->a + bs2*amat->i[j];
745         nz = amat->i[j+1]-amat->i[j];
746         for (i=0; i<nz; i++) {
747           for (col=0; col<bs; col++) {
748             for (row=0; row<bs; row++) {
749               sums[row] += PetscAbsScalar(*v); v++;
750             }
751           }
752         }
753         v  = bmat->a + bs2*bmat->i[j];
754         nz = bmat->i[j+1]-bmat->i[j];
755         for (i=0; i<nz; i++) {
756           for (col=0; col<bs; col++) {
757             for (row=0; row<bs; row++) {
758               sums[row] += PetscAbsScalar(*v); v++;
759             }
760           }
761         }
762         for (row=0; row<bs; row++) {
763           if (sums[row] > sum) sum = sums[row];
764         }
765       }
766       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
767       ierr = PetscFree(sums);CHKERRQ(ierr);
768     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
781 {
782   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
783   Mat            A     = baij->A,B=baij->B;
784   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
785   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
786   PetscErrorCode ierr;
787   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
788   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
789   PetscInt       *HT,key;
790   MatScalar      **HD;
791   PetscReal      tmp;
792 #if defined(PETSC_USE_INFO)
793   PetscInt ct=0,max=0;
794 #endif
795 
796   PetscFunctionBegin;
797   if (baij->ht) PetscFunctionReturn(0);
798 
799   baij->ht_size = (PetscInt)(factor*nz);
800   ht_size       = baij->ht_size;
801 
802   /* Allocate Memory for Hash Table */
803   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
804   HD   = baij->hd;
805   HT   = baij->ht;
806 
807   /* Loop Over A */
808   for (i=0; i<a->mbs; i++) {
809     for (j=ai[i]; j<ai[i+1]; j++) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(ht_size,key,tmp);
815       for (k=0; k<ht_size; k++) {
816         if (!HT[(h1+k)%ht_size]) {
817           HT[(h1+k)%ht_size] = key;
818           HD[(h1+k)%ht_size] = a->a + j*bs2;
819           break;
820 #if defined(PETSC_USE_INFO)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(PETSC_USE_INFO)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for (i=0; i<b->mbs; i++) {
833     for (j=bi[i]; j<bi[i+1]; j++) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(ht_size,key,tmp);
838       for (k=0; k<ht_size; k++) {
839         if (!HT[(h1+k)%ht_size]) {
840           HT[(h1+k)%ht_size] = key;
841           HD[(h1+k)%ht_size] = b->a + j*bs2;
842           break;
843 #if defined(PETSC_USE_INFO)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(PETSC_USE_INFO)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(PETSC_USE_INFO)
857   for (i=0,j=0; i<ht_size; i++) {
858     if (HT[i]) j++;
859   }
860   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
868   PetscErrorCode ierr;
869   PetscInt       nstash,reallocs;
870 
871   PetscFunctionBegin;
872   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
873 
874   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
875   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
876   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
877   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
878   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
879   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
886   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
887   PetscErrorCode ierr;
888   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
889   PetscInt       *row,*col;
890   PetscBool      r1,r2,r3,other_disassembled;
891   MatScalar      *val;
892   PetscMPIInt    n;
893 
894   PetscFunctionBegin;
895   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
896   if (!baij->donotstash && !mat->nooffprocentries) {
897     while (1) {
898       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
899       if (!flg) break;
900 
901       for (i=0; i<n;) {
902         /* Now identify the consecutive vals belonging to the same row */
903         for (j=i,rstart=row[j]; j<n; j++) {
904           if (row[j] != rstart) break;
905         }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         /* Now assemble all these values with a single function call */
909         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
910         i    = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
914     /* Now process the block-stash. Since the values are stashed column-oriented,
915        set the roworiented flag to column oriented, and after MatSetValues()
916        restore the original flags */
917     r1 = baij->roworiented;
918     r2 = a->roworiented;
919     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
920 
921     baij->roworiented = PETSC_FALSE;
922     a->roworiented    = PETSC_FALSE;
923 
924     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
925     while (1) {
926       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
927       if (!flg) break;
928 
929       for (i=0; i<n;) {
930         /* Now identify the consecutive vals belonging to the same row */
931         for (j=i,rstart=row[j]; j<n; j++) {
932           if (row[j] != rstart) break;
933         }
934         if (j < n) ncols = j-i;
935         else       ncols = n-i;
936         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
937         i    = j;
938       }
939     }
940     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
941 
942     baij->roworiented = r1;
943     a->roworiented    = r2;
944 
945     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
946   }
947 
948   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
950 
951   /* determine if any processor has disassembled, if so we must
952      also disassemble ourselfs, in order that we may reassemble. */
953   /*
954      if nonzero structure of submatrix B cannot change then we know that
955      no processor disassembled thus we can skip this stuff
956   */
957   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
958     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     if (mat->was_assembled && !other_disassembled) {
960       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
961     }
962   }
963 
964   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
965     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
966   }
967   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
969 
970 #if defined(PETSC_USE_INFO)
971   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
972     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
973 
974     baij->ht_total_ct  = 0;
975     baij->ht_insert_ct = 0;
976   }
977 #endif
978   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
980 
981     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
982     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
983   }
984 
985   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
986 
987   baij->rowvalues = 0;
988 
989   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
990   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
991     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
992     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
998 #include <petscdraw.h>
999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1000 {
1001   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1002   PetscErrorCode    ierr;
1003   PetscMPIInt       rank = baij->rank;
1004   PetscInt          bs   = mat->rmap->bs;
1005   PetscBool         iascii,isdraw;
1006   PetscViewer       sviewer;
1007   PetscViewerFormat format;
1008 
1009   PetscFunctionBegin;
1010   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1011   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1012   if (iascii) {
1013     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1014     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1015       MatInfo info;
1016       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1017       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1018       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1019       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1020                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1021       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1022       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1023       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1024       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1025       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1026       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1027       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1028       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1031       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1032       PetscFunctionReturn(0);
1033     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1034       PetscFunctionReturn(0);
1035     }
1036   }
1037 
1038   if (isdraw) {
1039     PetscDraw draw;
1040     PetscBool isnull;
1041     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1042     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1043     if (isnull) PetscFunctionReturn(0);
1044   }
1045 
1046   {
1047     /* assemble the entire matrix onto first processor. */
1048     Mat         A;
1049     Mat_SeqBAIJ *Aloc;
1050     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1051     MatScalar   *a;
1052     const char  *matname;
1053 
1054     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1055     /* Perhaps this should be the type of mat? */
1056     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1059     } else {
1060       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1061     }
1062     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1063     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1064     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1066 
1067     /* copy over the A part */
1068     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1069     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1070     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1071 
1072     for (i=0; i<mbs; i++) {
1073       rvals[0] = bs*(baij->rstartbs + i);
1074       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1075       for (j=ai[i]; j<ai[i+1]; j++) {
1076         col = (baij->cstartbs+aj[j])*bs;
1077         for (k=0; k<bs; k++) {
1078           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1079           col++; a += bs;
1080         }
1081       }
1082     }
1083     /* copy over the B part */
1084     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1085     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1086     for (i=0; i<mbs; i++) {
1087       rvals[0] = bs*(baij->rstartbs + i);
1088       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1089       for (j=ai[i]; j<ai[i+1]; j++) {
1090         col = baij->garray[aj[j]]*bs;
1091         for (k=0; k<bs; k++) {
1092           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1093           col++; a += bs;
1094         }
1095       }
1096     }
1097     ierr = PetscFree(rvals);CHKERRQ(ierr);
1098     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     /*
1101        Everyone has to call to draw the matrix since the graphics waits are
1102        synchronized across all processors that share the PetscDraw object
1103     */
1104     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1105     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1106     if (!rank) {
1107       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1108       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1109     }
1110     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1111     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1118 {
1119   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1120   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1121   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1122   PetscErrorCode ierr;
1123   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1124   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1125   int            fd;
1126   PetscScalar    *column_values;
1127   FILE           *file;
1128   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1129   PetscInt       message_count,flowcontrolcount;
1130 
1131   PetscFunctionBegin;
1132   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1134   nz   = bs2*(A->nz + B->nz);
1135   rlen = mat->rmap->n;
1136   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1137   if (!rank) {
1138     header[0] = MAT_FILE_CLASSID;
1139     header[1] = mat->rmap->N;
1140     header[2] = mat->cmap->N;
1141 
1142     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1143     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1144     /* get largest number of rows any processor has */
1145     range = mat->rmap->range;
1146     for (i=1; i<size; i++) {
1147       rlen = PetscMax(rlen,range[i+1] - range[i]);
1148     }
1149   } else {
1150     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1151   }
1152 
1153   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1154   /* compute lengths of each row  */
1155   for (i=0; i<a->mbs; i++) {
1156     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1157   }
1158   /* store the row lengths to the file */
1159   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1160   if (!rank) {
1161     MPI_Status status;
1162     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1163     rlen = (range[1] - range[0])/bs;
1164     for (i=0; i<rlen; i++) {
1165       for (j=0; j<bs; j++) {
1166         row_lens[i*bs+j] = bs*crow_lens[i];
1167       }
1168     }
1169     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       rlen = (range[i+1] - range[i])/bs;
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       for (k=0; k<rlen; k++) {
1175         for (j=0; j<bs; j++) {
1176           row_lens[k*bs+j] = bs*crow_lens[k];
1177         }
1178       }
1179       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1182     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1183   } else {
1184     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1185     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1187   }
1188   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1189 
1190   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1191      information needed to make it for each row from a block row. This does require more communication but still not more than
1192      the communication needed for the nonzero values  */
1193   nzmax = nz; /*  space a largest processor needs */
1194   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1196   cnt   = 0;
1197   for (i=0; i<a->mbs; i++) {
1198     pcnt = cnt;
1199     for (j=B->i[i]; j<B->i[i+1]; j++) {
1200       if ((col = garray[B->j[j]]) > cstart) break;
1201       for (l=0; l<bs; l++) {
1202         column_indices[cnt++] = bs*col+l;
1203       }
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       for (l=0; l<bs; l++) {
1207         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1208       }
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1213       }
1214     }
1215     len = cnt - pcnt;
1216     for (k=1; k<bs; k++) {
1217       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1218       cnt += len;
1219     }
1220   }
1221   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1222 
1223   /* store the columns to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the numerical values */
1244   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<a->mbs; i++) {
1247     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1248     for (j=B->i[i]; j<B->i[i+1]; j++) {
1249       if (garray[B->j[j]] > cstart) break;
1250       for (l=0; l<bs; l++) {
1251         for (ll=0; ll<bs; ll++) {
1252           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1253         }
1254       }
1255       cnt += bs;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) {
1258       for (l=0; l<bs; l++) {
1259         for (ll=0; ll<bs; ll++) {
1260           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1261         }
1262       }
1263       cnt += bs;
1264     }
1265     for (; j<B->i[i+1]; j++) {
1266       for (l=0; l<bs; l++) {
1267         for (ll=0; ll<bs; ll++) {
1268           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1269         }
1270       }
1271       cnt += bs;
1272     }
1273     cnt += (bs-1)*rlen;
1274   }
1275   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1276 
1277   /* store the column values to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     MPI_Status status;
1281     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     for (i=1; i<size; i++) {
1283       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1284       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1285       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1292     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_values);CHKERRQ(ierr);
1296 
1297   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1298   if (file) {
1299     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1305 {
1306   PetscErrorCode ierr;
1307   PetscBool      iascii,isdraw,issocket,isbinary;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1314   if (iascii || isdraw || issocket) {
1315     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1316   } else if (isbinary) {
1317     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1323 {
1324   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328 #if defined(PETSC_USE_LOG)
1329   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1330 #endif
1331   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1332   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1333   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1334   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1335 #if defined(PETSC_USE_CTABLE)
1336   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1337 #else
1338   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1339 #endif
1340   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1341   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1342   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1343   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1344   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1345   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1346   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1347   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1348 
1349   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1358 #if defined(PETSC_HAVE_HYPRE)
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1360 #endif
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscInt       nt;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1374   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1375   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1376   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1377   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1379   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1385 {
1386   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1392   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   /* do nondiagonal part */
1404   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1405   /* do local part */
1406   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1407   /* add partial results together */
1408   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1409   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   /* do nondiagonal part */
1420   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1421   /* do local part */
1422   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1423   /* add partial results together */
1424   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1425   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 /*
1430   This only works correctly for square matrices where the subblock A->A is the
1431    diagonal block
1432 */
1433 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1440   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1445 {
1446   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1451   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1456 {
1457   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1458   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1459   PetscErrorCode ierr;
1460   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1461   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1462   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1463 
1464   PetscFunctionBegin;
1465   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1466   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1467   mat->getrowactive = PETSC_TRUE;
1468 
1469   if (!mat->rowvalues && (idx || v)) {
1470     /*
1471         allocate enough space to hold information from the longest row.
1472     */
1473     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1474     PetscInt    max = 1,mbs = mat->mbs,tmp;
1475     for (i=0; i<mbs; i++) {
1476       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1477       if (max < tmp) max = tmp;
1478     }
1479     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1480   }
1481   lrow = row - brstart;
1482 
1483   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1484   if (!v)   {pvA = 0; pvB = 0;}
1485   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1486   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1487   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1488   nztot = nzA + nzB;
1489 
1490   cmap = mat->garray;
1491   if (v  || idx) {
1492     if (nztot) {
1493       /* Sort by increasing column numbers, assuming A and B already sorted */
1494       PetscInt imark = -1;
1495       if (v) {
1496         *v = v_p = mat->rowvalues;
1497         for (i=0; i<nzB; i++) {
1498           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1499           else break;
1500         }
1501         imark = i;
1502         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1503         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1504       }
1505       if (idx) {
1506         *idx = idx_p = mat->rowindices;
1507         if (imark > -1) {
1508           for (i=0; i<imark; i++) {
1509             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1510           }
1511         } else {
1512           for (i=0; i<nzB; i++) {
1513             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1514             else break;
1515           }
1516           imark = i;
1517         }
1518         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1519         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1520       }
1521     } else {
1522       if (idx) *idx = 0;
1523       if (v)   *v   = 0;
1524     }
1525   }
1526   *nz  = nztot;
1527   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1528   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1535 
1536   PetscFunctionBegin;
1537   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1538   baij->getrowactive = PETSC_FALSE;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1543 {
1544   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1549   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1554 {
1555   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1556   Mat            A  = a->A,B = a->B;
1557   PetscErrorCode ierr;
1558   PetscReal      isend[5],irecv[5];
1559 
1560   PetscFunctionBegin;
1561   info->block_size = (PetscReal)matin->rmap->bs;
1562 
1563   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1564 
1565   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1566   isend[3] = info->memory;  isend[4] = info->mallocs;
1567 
1568   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1569 
1570   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1571   isend[3] += info->memory;  isend[4] += info->mallocs;
1572 
1573   if (flag == MAT_LOCAL) {
1574     info->nz_used      = isend[0];
1575     info->nz_allocated = isend[1];
1576     info->nz_unneeded  = isend[2];
1577     info->memory       = isend[3];
1578     info->mallocs      = isend[4];
1579   } else if (flag == MAT_GLOBAL_MAX) {
1580     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1581 
1582     info->nz_used      = irecv[0];
1583     info->nz_allocated = irecv[1];
1584     info->nz_unneeded  = irecv[2];
1585     info->memory       = irecv[3];
1586     info->mallocs      = irecv[4];
1587   } else if (flag == MAT_GLOBAL_SUM) {
1588     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1589 
1590     info->nz_used      = irecv[0];
1591     info->nz_allocated = irecv[1];
1592     info->nz_unneeded  = irecv[2];
1593     info->memory       = irecv[3];
1594     info->mallocs      = irecv[4];
1595   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1596   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1597   info->fill_ratio_needed = 0;
1598   info->factor_mallocs    = 0;
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1603 {
1604   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606 
1607   PetscFunctionBegin;
1608   switch (op) {
1609   case MAT_NEW_NONZERO_LOCATIONS:
1610   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1611   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1612   case MAT_KEEP_NONZERO_PATTERN:
1613   case MAT_NEW_NONZERO_LOCATION_ERR:
1614     MatCheckPreallocated(A,1);
1615     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1616     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1617     break;
1618   case MAT_ROW_ORIENTED:
1619     MatCheckPreallocated(A,1);
1620     a->roworiented = flg;
1621 
1622     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1623     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1624     break;
1625   case MAT_NEW_DIAGONALS:
1626     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1627     break;
1628   case MAT_IGNORE_OFF_PROC_ENTRIES:
1629     a->donotstash = flg;
1630     break;
1631   case MAT_USE_HASH_TABLE:
1632     a->ht_flag = flg;
1633     a->ht_fact = 1.39;
1634     break;
1635   case MAT_SYMMETRIC:
1636   case MAT_STRUCTURALLY_SYMMETRIC:
1637   case MAT_HERMITIAN:
1638   case MAT_SUBMAT_SINGLEIS:
1639   case MAT_SYMMETRY_ETERNAL:
1640     MatCheckPreallocated(A,1);
1641     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1642     break;
1643   default:
1644     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1650 {
1651   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1652   Mat_SeqBAIJ    *Aloc;
1653   Mat            B;
1654   PetscErrorCode ierr;
1655   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1656   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1657   MatScalar      *a;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1661     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1662     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1663     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1664     /* Do not know preallocation information, but must set block size */
1665     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1666   } else {
1667     B = *matout;
1668   }
1669 
1670   /* copy over the A part */
1671   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1672   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1673   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1674 
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstartbs + i);
1677     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = (baij->cstartbs+aj[j])*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682 
1683         col++; a += bs;
1684       }
1685     }
1686   }
1687   /* copy over the B part */
1688   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1689   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1690   for (i=0; i<mbs; i++) {
1691     rvals[0] = bs*(baij->rstartbs + i);
1692     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1693     for (j=ai[i]; j<ai[i+1]; j++) {
1694       col = baij->garray[aj[j]]*bs;
1695       for (k=0; k<bs; k++) {
1696         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1697         col++;
1698         a += bs;
1699       }
1700     }
1701   }
1702   ierr = PetscFree(rvals);CHKERRQ(ierr);
1703   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705 
1706   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1707   else {
1708     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1714 {
1715   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1716   Mat            a     = baij->A,b = baij->B;
1717   PetscErrorCode ierr;
1718   PetscInt       s1,s2,s3;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1722   if (rr) {
1723     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1724     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1725     /* Overlap communication with computation. */
1726     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   }
1728   if (ll) {
1729     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1730     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1731     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1732   }
1733   /* scale  the diagonal block */
1734   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1735 
1736   if (rr) {
1737     /* Do a scatter end and then right scale the off-diagonal block */
1738     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1739     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1745 {
1746   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1747   PetscInt      *lrows;
1748   PetscInt       r, len;
1749   PetscBool      cong;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   /* get locally owned rows */
1754   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1755   /* fix right hand side if needed */
1756   if (x && b) {
1757     const PetscScalar *xx;
1758     PetscScalar       *bb;
1759 
1760     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1761     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1762     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1763     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1764     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1765   }
1766 
1767   /* actually zap the local rows */
1768   /*
1769         Zero the required rows. If the "diagonal block" of the matrix
1770      is square and the user wishes to set the diagonal we use separate
1771      code so that MatSetValues() is not called for each diagonal allocating
1772      new memory, thus calling lots of mallocs and slowing things down.
1773 
1774   */
1775   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1776   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1777   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1778   if ((diag != 0.0) && cong) {
1779     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1780   } else if (diag != 0.0) {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1782     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1783        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1784     for (r = 0; r < len; ++r) {
1785       const PetscInt row = lrows[r] + A->rmap->rstart;
1786       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1787     }
1788     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790   } else {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1792   }
1793   ierr = PetscFree(lrows);CHKERRQ(ierr);
1794 
1795   /* only change matrix nonzero state if pattern was allowed to be changed */
1796   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1797     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1798     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1804 {
1805   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1806   PetscErrorCode    ierr;
1807   PetscMPIInt       n = A->rmap->n;
1808   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1809   PetscInt          *lrows,*owners = A->rmap->range;
1810   PetscSFNode       *rrows;
1811   PetscSF           sf;
1812   const PetscScalar *xx;
1813   PetscScalar       *bb,*mask;
1814   Vec               xmask,lmask;
1815   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1816   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1817   PetscScalar       *aa;
1818 
1819   PetscFunctionBegin;
1820   /* Create SF where leaves are input rows and roots are owned rows */
1821   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1822   for (r = 0; r < n; ++r) lrows[r] = -1;
1823   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1824   for (r = 0; r < N; ++r) {
1825     const PetscInt idx   = rows[r];
1826     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1827     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1828       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1829     }
1830     rrows[r].rank  = p;
1831     rrows[r].index = rows[r] - owners[p];
1832   }
1833   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1834   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1835   /* Collect flags for rows to be zeroed */
1836   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1837   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1838   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1839   /* Compress and put in row numbers */
1840   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1841   /* zero diagonal part of matrix */
1842   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1843   /* handle off diagonal part of matrix */
1844   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1845   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1846   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1847   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1848   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1849   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1851   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1852   if (x) {
1853     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1854     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1855     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1856     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1857   }
1858   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1859   /* remove zeroed rows of off diagonal matrix */
1860   for (i = 0; i < len; ++i) {
1861     row   = lrows[i];
1862     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1863     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1864     for (k = 0; k < count; ++k) {
1865       aa[0] = 0.0;
1866       aa   += bs;
1867     }
1868   }
1869   /* loop over all elements of off process part of matrix zeroing removed columns*/
1870   for (i = 0; i < l->B->rmap->N; ++i) {
1871     row = i/bs;
1872     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1873       for (k = 0; k < bs; ++k) {
1874         col = bs*baij->j[j] + k;
1875         if (PetscAbsScalar(mask[col])) {
1876           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1877           if (x) bb[i] -= aa[0]*xx[col];
1878           aa[0] = 0.0;
1879         }
1880       }
1881     }
1882   }
1883   if (x) {
1884     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1885     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1886   }
1887   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1888   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* only change matrix nonzero state if pattern was allowed to be changed */
1892   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1893     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1894     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1900 {
1901   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1910 
1911 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1912 {
1913   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1914   Mat            a,b,c,d;
1915   PetscBool      flg;
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   a = matA->A; b = matA->B;
1920   c = matB->A; d = matB->B;
1921 
1922   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1923   if (flg) {
1924     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1925   }
1926   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1931 {
1932   PetscErrorCode ierr;
1933   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1934   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1935 
1936   PetscFunctionBegin;
1937   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1938   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1939     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1940   } else {
1941     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1942     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1943   }
1944   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1949 {
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1958 {
1959   PetscErrorCode ierr;
1960   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1961   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1962   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1963 
1964   PetscFunctionBegin;
1965   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1970 {
1971   PetscErrorCode ierr;
1972   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1973   PetscBLASInt   bnz,one=1;
1974   Mat_SeqBAIJ    *x,*y;
1975   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x    = (Mat_SeqBAIJ*)xx->A->data;
1981     y    = (Mat_SeqBAIJ*)yy->A->data;
1982     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1983     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1984     x    = (Mat_SeqBAIJ*)xx->B->data;
1985     y    = (Mat_SeqBAIJ*)yy->B->data;
1986     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1987     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1988     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1989   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1991   } else {
1992     Mat      B;
1993     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1994     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1995     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1996     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1997     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1998     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1999     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2000     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2001     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2002     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2003     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2004     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2005     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2006     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2007     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2008     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2020   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2025 {
2026   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2031   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   IS             iscol_local;
2039   PetscInt       csize;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2043   if (call == MAT_REUSE_MATRIX) {
2044     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2045     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2046   } else {
2047     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2048   }
2049   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2050   if (call == MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2052     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /*
2058   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2059   in local and then by concatenating the local matrices the end result.
2060   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2061   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2062 */
2063 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2064 {
2065   PetscErrorCode ierr;
2066   PetscMPIInt    rank,size;
2067   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2068   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2069   Mat            M,Mreuse;
2070   MatScalar      *vwork,*aa;
2071   MPI_Comm       comm;
2072   IS             isrow_new, iscol_new;
2073   Mat_SeqBAIJ    *aij;
2074 
2075   PetscFunctionBegin;
2076   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2079   /* The compression and expansion should be avoided. Doesn't point
2080      out errors, might change the indices, hence buggey */
2081   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2082   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2083 
2084   if (call ==  MAT_REUSE_MATRIX) {
2085     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2086     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2087     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2088   } else {
2089     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2090   }
2091   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2092   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2093   /*
2094       m - number of local rows
2095       n - number of columns (same on all processors)
2096       rstart - first row in new global matrix generated
2097   */
2098   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2099   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2100   m    = m/bs;
2101   n    = n/bs;
2102 
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n*bs) { /* square matrix */
2117         nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize/bs;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2130     for (i=0; i<m; i++) {
2131       jend = ii[i+1] - ii[i];
2132       olen = 0;
2133       dlen = 0;
2134       for (j=0; j<jend; j++) {
2135         if (*jj < rstart || *jj >= rend) olen++;
2136         else dlen++;
2137         jj++;
2138       }
2139       olens[i] = olen;
2140       dlens[i] = dlen;
2141     }
2142     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2143     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2144     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2145     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2146     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2147     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2148   } else {
2149     PetscInt ml,nl;
2150 
2151     M    = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2163   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2164   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2165   ii   = aij->i;
2166   jj   = aij->j;
2167   aa   = aij->a;
2168   for (i=0; i<m; i++) {
2169     row   = rstart/bs + i;
2170     nz    = ii[i+1] - ii[i];
2171     cwork = jj;     jj += nz;
2172     vwork = aa;     aa += nz*bs*bs;
2173     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2174   }
2175 
2176   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   *newmat = M;
2179 
2180   /* save submatrix used in processor for next request */
2181   if (call ==  MAT_INITIAL_MATRIX) {
2182     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2183     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2184   }
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2189 {
2190   MPI_Comm       comm,pcomm;
2191   PetscInt       clocal_size,nrows;
2192   const PetscInt *rows;
2193   PetscMPIInt    size;
2194   IS             crowp,lcolp;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2199   /* make a collective version of 'rowp' */
2200   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2201   if (pcomm==comm) {
2202     crowp = rowp;
2203   } else {
2204     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2205     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2206     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2207     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2208   }
2209   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2210   /* make a local version of 'colp' */
2211   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2212   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2213   if (size==1) {
2214     lcolp = colp;
2215   } else {
2216     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2217   }
2218   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2219   /* now we just get the submatrix */
2220   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2221   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2222   /* clean up */
2223   if (pcomm!=comm) {
2224     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2225   }
2226   if (size>1) {
2227     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2233 {
2234   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2235   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2236 
2237   PetscFunctionBegin;
2238   if (nghosts) *nghosts = B->nbs;
2239   if (ghosts) *ghosts = baij->garray;
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2244 {
2245   Mat            B;
2246   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2247   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2248   Mat_SeqAIJ     *b;
2249   PetscErrorCode ierr;
2250   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2251   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2252   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2253 
2254   PetscFunctionBegin;
2255   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2256   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2257 
2258   /* ----------------------------------------------------------------
2259      Tell every processor the number of nonzeros per row
2260   */
2261   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2262   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2263     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2264   }
2265   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2266   displs    = recvcounts + size;
2267   for (i=0; i<size; i++) {
2268     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2269     displs[i]     = A->rmap->range[i]/bs;
2270   }
2271 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2272   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2273 #else
2274   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2275   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2276 #endif
2277   /* ---------------------------------------------------------------
2278      Create the sequential matrix of the same type as the local block diagonal
2279   */
2280   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2281   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2282   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2283   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2284   b    = (Mat_SeqAIJ*)B->data;
2285 
2286   /*--------------------------------------------------------------------
2287     Copy my part of matrix column indices over
2288   */
2289   sendcount  = ad->nz + bd->nz;
2290   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2291   a_jsendbuf = ad->j;
2292   b_jsendbuf = bd->j;
2293   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2294   cnt        = 0;
2295   for (i=0; i<n; i++) {
2296 
2297     /* put in lower diagonal portion */
2298     m = bd->i[i+1] - bd->i[i];
2299     while (m > 0) {
2300       /* is it above diagonal (in bd (compressed) numbering) */
2301       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2302       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2303       m--;
2304     }
2305 
2306     /* put in diagonal portion */
2307     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2308       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2309     }
2310 
2311     /* put in upper diagonal portion */
2312     while (m-- > 0) {
2313       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2314     }
2315   }
2316   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2317 
2318   /*--------------------------------------------------------------------
2319     Gather all column indices to all processors
2320   */
2321   for (i=0; i<size; i++) {
2322     recvcounts[i] = 0;
2323     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2324       recvcounts[i] += lens[j];
2325     }
2326   }
2327   displs[0] = 0;
2328   for (i=1; i<size; i++) {
2329     displs[i] = displs[i-1] + recvcounts[i-1];
2330   }
2331 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2332   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2333 #else
2334   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2335 #endif
2336   /*--------------------------------------------------------------------
2337     Assemble the matrix into useable form (note numerical values not yet set)
2338   */
2339   /* set the b->ilen (length of each row) values */
2340   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2341   /* set the b->i indices */
2342   b->i[0] = 0;
2343   for (i=1; i<=A->rmap->N/bs; i++) {
2344     b->i[i] = b->i[i-1] + lens[i-1];
2345   }
2346   ierr = PetscFree(lens);CHKERRQ(ierr);
2347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2350 
2351   if (A->symmetric) {
2352     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2353   } else if (A->hermitian) {
2354     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2355   } else if (A->structurally_symmetric) {
2356     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2357   }
2358   *newmat = B;
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2363 {
2364   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2365   PetscErrorCode ierr;
2366   Vec            bb1 = 0;
2367 
2368   PetscFunctionBegin;
2369   if (flag == SOR_APPLY_UPPER) {
2370     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2371     PetscFunctionReturn(0);
2372   }
2373 
2374   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2375     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2376   }
2377 
2378   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2379     if (flag & SOR_ZERO_INITIAL_GUESS) {
2380       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2381       its--;
2382     }
2383 
2384     while (its--) {
2385       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2386       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2387 
2388       /* update rhs: bb1 = bb - B*x */
2389       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2390       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2391 
2392       /* local sweep */
2393       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2394     }
2395   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2396     if (flag & SOR_ZERO_INITIAL_GUESS) {
2397       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2398       its--;
2399     }
2400     while (its--) {
2401       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2403 
2404       /* update rhs: bb1 = bb - B*x */
2405       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2406       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2407 
2408       /* local sweep */
2409       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2410     }
2411   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2412     if (flag & SOR_ZERO_INITIAL_GUESS) {
2413       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2414       its--;
2415     }
2416     while (its--) {
2417       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2418       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2419 
2420       /* update rhs: bb1 = bb - B*x */
2421       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2422       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2423 
2424       /* local sweep */
2425       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2426     }
2427   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2428 
2429   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2434 {
2435   PetscErrorCode ierr;
2436   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2437   PetscInt       N,i,*garray = aij->garray;
2438   PetscInt       ib,jb,bs = A->rmap->bs;
2439   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2440   MatScalar      *a_val = a_aij->a;
2441   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2442   MatScalar      *b_val = b_aij->a;
2443   PetscReal      *work;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2447   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2448   if (type == NORM_2) {
2449     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2450       for (jb=0; jb<bs; jb++) {
2451         for (ib=0; ib<bs; ib++) {
2452           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2453           a_val++;
2454         }
2455       }
2456     }
2457     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2458       for (jb=0; jb<bs; jb++) {
2459         for (ib=0; ib<bs; ib++) {
2460           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2461           b_val++;
2462         }
2463       }
2464     }
2465   } else if (type == NORM_1) {
2466     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2467       for (jb=0; jb<bs; jb++) {
2468         for (ib=0; ib<bs; ib++) {
2469           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2470           a_val++;
2471         }
2472       }
2473     }
2474     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2475       for (jb=0; jb<bs; jb++) {
2476        for (ib=0; ib<bs; ib++) {
2477           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2478           b_val++;
2479         }
2480       }
2481     }
2482   } else if (type == NORM_INFINITY) {
2483     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2484       for (jb=0; jb<bs; jb++) {
2485         for (ib=0; ib<bs; ib++) {
2486           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2487           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2488           a_val++;
2489         }
2490       }
2491     }
2492     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2493       for (jb=0; jb<bs; jb++) {
2494         for (ib=0; ib<bs; ib++) {
2495           int col = garray[b_aij->j[i]] * bs + jb;
2496           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2497           b_val++;
2498         }
2499       }
2500     }
2501   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2502   if (type == NORM_INFINITY) {
2503     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2504   } else {
2505     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2506   }
2507   ierr = PetscFree(work);CHKERRQ(ierr);
2508   if (type == NORM_2) {
2509     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2510   }
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2515 {
2516   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2521   A->factorerrortype             = a->A->factorerrortype;
2522   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2523   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2528 {
2529   PetscErrorCode ierr;
2530   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2531   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2532 
2533   PetscFunctionBegin;
2534   if (!Y->preallocated) {
2535     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2536   } else if (!aij->nz) {
2537     PetscInt nonew = aij->nonew;
2538     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2539     aij->nonew = nonew;
2540   }
2541   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2546 {
2547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2552   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2553   if (d) {
2554     PetscInt rstart;
2555     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2556     *d += rstart/A->rmap->bs;
2557 
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2563 {
2564   PetscFunctionBegin;
2565   *a = ((Mat_MPIBAIJ*)A->data)->A;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /* -------------------------------------------------------------------*/
2570 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2571                                        MatGetRow_MPIBAIJ,
2572                                        MatRestoreRow_MPIBAIJ,
2573                                        MatMult_MPIBAIJ,
2574                                 /* 4*/ MatMultAdd_MPIBAIJ,
2575                                        MatMultTranspose_MPIBAIJ,
2576                                        MatMultTransposeAdd_MPIBAIJ,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                 /*10*/ 0,
2581                                        0,
2582                                        0,
2583                                        MatSOR_MPIBAIJ,
2584                                        MatTranspose_MPIBAIJ,
2585                                 /*15*/ MatGetInfo_MPIBAIJ,
2586                                        MatEqual_MPIBAIJ,
2587                                        MatGetDiagonal_MPIBAIJ,
2588                                        MatDiagonalScale_MPIBAIJ,
2589                                        MatNorm_MPIBAIJ,
2590                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2591                                        MatAssemblyEnd_MPIBAIJ,
2592                                        MatSetOption_MPIBAIJ,
2593                                        MatZeroEntries_MPIBAIJ,
2594                                 /*24*/ MatZeroRows_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*29*/ MatSetUp_MPIBAIJ,
2600                                        0,
2601                                        0,
2602                                        MatGetDiagonalBlock_MPIBAIJ,
2603                                        0,
2604                                 /*34*/ MatDuplicate_MPIBAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                 /*39*/ MatAXPY_MPIBAIJ,
2610                                        MatCreateSubMatrices_MPIBAIJ,
2611                                        MatIncreaseOverlap_MPIBAIJ,
2612                                        MatGetValues_MPIBAIJ,
2613                                        MatCopy_MPIBAIJ,
2614                                 /*44*/ 0,
2615                                        MatScale_MPIBAIJ,
2616                                        MatShift_MPIBAIJ,
2617                                        0,
2618                                        MatZeroRowsColumns_MPIBAIJ,
2619                                 /*49*/ 0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2625                                        0,
2626                                        MatSetUnfactored_MPIBAIJ,
2627                                        MatPermute_MPIBAIJ,
2628                                        MatSetValuesBlocked_MPIBAIJ,
2629                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2630                                        MatDestroy_MPIBAIJ,
2631                                        MatView_MPIBAIJ,
2632                                        0,
2633                                        0,
2634                                 /*64*/ 0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*74*/ 0,
2645                                        MatFDColoringApply_BAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*79*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        MatLoad_MPIBAIJ,
2654                                 /*84*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*89*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*94*/ 0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*99*/ 0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*104*/0,
2675                                        MatRealPart_MPIBAIJ,
2676                                        MatImaginaryPart_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*109*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatMissingDiagonal_MPIBAIJ,
2684                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2685                                        0,
2686                                        MatGetGhosts_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                 /*119*/0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        MatGetMultiProcBlock_MPIBAIJ,
2694                                 /*124*/0,
2695                                        MatGetColumnNorms_MPIBAIJ,
2696                                        MatInvertBlockDiagonal_MPIBAIJ,
2697                                        0,
2698                                        0,
2699                                /*129*/ 0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /*134*/ 0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                /*139*/ MatSetBlockSizes_Default,
2710                                        0,
2711                                        0,
2712                                        MatFDColoringSetUp_MPIXAIJ,
2713                                        0,
2714                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2715 };
2716 
2717 
2718 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2719 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2720 
2721 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2722 {
2723   PetscInt       m,rstart,cstart,cend;
2724   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2725   const PetscInt *JJ    =0;
2726   PetscScalar    *values=0;
2727   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2732   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2733   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2734   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2735   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2736   m      = B->rmap->n/bs;
2737   rstart = B->rmap->rstart/bs;
2738   cstart = B->cmap->rstart/bs;
2739   cend   = B->cmap->rend/bs;
2740 
2741   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2742   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2743   for (i=0; i<m; i++) {
2744     nz = ii[i+1] - ii[i];
2745     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2746     nz_max = PetscMax(nz_max,nz);
2747     dlen   = 0;
2748     olen   = 0;
2749     JJ     = jj + ii[i];
2750     for (j=0; j<nz; j++) {
2751       if (*JJ < cstart || *JJ >= cend) olen++;
2752       else dlen++;
2753       JJ++;
2754     }
2755     d_nnz[i] = dlen;
2756     o_nnz[i] = olen;
2757   }
2758   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2759   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2760 
2761   values = (PetscScalar*)V;
2762   if (!values) {
2763     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2764   }
2765   for (i=0; i<m; i++) {
2766     PetscInt          row    = i + rstart;
2767     PetscInt          ncols  = ii[i+1] - ii[i];
2768     const PetscInt    *icols = jj + ii[i];
2769     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2770       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2771       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2772     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2773       PetscInt j;
2774       for (j=0; j<ncols; j++) {
2775         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2776         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2777       }
2778     }
2779   }
2780 
2781   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2782   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 /*@C
2789    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2790    (the default parallel PETSc format).
2791 
2792    Collective on MPI_Comm
2793 
2794    Input Parameters:
2795 +  B - the matrix
2796 .  bs - the block size
2797 .  i - the indices into j for the start of each local row (starts with zero)
2798 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2799 -  v - optional values in the matrix
2800 
2801    Level: developer
2802 
2803    Notes:
2804     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2805    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2806    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2807    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2808    block column and the second index is over columns within a block.
2809 
2810 .keywords: matrix, aij, compressed row, sparse, parallel
2811 
2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2813 @*/
2814 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2815 {
2816   PetscErrorCode ierr;
2817 
2818   PetscFunctionBegin;
2819   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2820   PetscValidType(B,1);
2821   PetscValidLogicalCollectiveInt(B,bs,2);
2822   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2827 {
2828   Mat_MPIBAIJ    *b;
2829   PetscErrorCode ierr;
2830   PetscInt       i;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2834   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2836   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2837 
2838   if (d_nnz) {
2839     for (i=0; i<B->rmap->n/bs; i++) {
2840       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2841     }
2842   }
2843   if (o_nnz) {
2844     for (i=0; i<B->rmap->n/bs; i++) {
2845       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2846     }
2847   }
2848 
2849   b      = (Mat_MPIBAIJ*)B->data;
2850   b->bs2 = bs*bs;
2851   b->mbs = B->rmap->n/bs;
2852   b->nbs = B->cmap->n/bs;
2853   b->Mbs = B->rmap->N/bs;
2854   b->Nbs = B->cmap->N/bs;
2855 
2856   for (i=0; i<=b->size; i++) {
2857     b->rangebs[i] = B->rmap->range[i]/bs;
2858   }
2859   b->rstartbs = B->rmap->rstart/bs;
2860   b->rendbs   = B->rmap->rend/bs;
2861   b->cstartbs = B->cmap->rstart/bs;
2862   b->cendbs   = B->cmap->rend/bs;
2863 
2864 #if defined(PETSC_USE_CTABLE)
2865   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2866 #else
2867   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2868 #endif
2869   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2870   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2871   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2872 
2873   /* Because the B will have been resized we simply destroy it and create a new one each time */
2874   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2875   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2876   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2877   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2878   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2879 
2880   if (!B->preallocated) {
2881     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2882     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2883     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2884     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2885     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2886   }
2887 
2888   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2889   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2890   B->preallocated  = PETSC_TRUE;
2891   B->was_assembled = PETSC_FALSE;
2892   B->assembled     = PETSC_FALSE;
2893   PetscFunctionReturn(0);
2894 }
2895 
2896 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2897 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2898 
2899 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2900 {
2901   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2902   PetscErrorCode ierr;
2903   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2904   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2905   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2906 
2907   PetscFunctionBegin;
2908   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2909   ii[0] = 0;
2910   for (i=0; i<M; i++) {
2911     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2912     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2913     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2914     /* remove one from count of matrix has diagonal */
2915     for (j=id[i]; j<id[i+1]; j++) {
2916       if (jd[j] == i) {ii[i+1]--;break;}
2917     }
2918   }
2919   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2920   cnt  = 0;
2921   for (i=0; i<M; i++) {
2922     for (j=io[i]; j<io[i+1]; j++) {
2923       if (garray[jo[j]] > rstart) break;
2924       jj[cnt++] = garray[jo[j]];
2925     }
2926     for (k=id[i]; k<id[i+1]; k++) {
2927       if (jd[k] != i) {
2928         jj[cnt++] = rstart + jd[k];
2929       }
2930     }
2931     for (; j<io[i+1]; j++) {
2932       jj[cnt++] = garray[jo[j]];
2933     }
2934   }
2935   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2940 
2941 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2942 
2943 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2944 {
2945   PetscErrorCode ierr;
2946   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2947   Mat            B;
2948   Mat_MPIAIJ     *b;
2949 
2950   PetscFunctionBegin;
2951   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2952 
2953   if (reuse == MAT_REUSE_MATRIX) {
2954     B = *newmat;
2955   } else {
2956     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2957     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2958     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2959     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2960     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2961     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2962   }
2963   b = (Mat_MPIAIJ*) B->data;
2964 
2965   if (reuse == MAT_REUSE_MATRIX) {
2966     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2967     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2968   } else {
2969     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2970     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2971     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2972     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2973     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2974     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   }
2977   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2979 
2980   if (reuse == MAT_INPLACE_MATRIX) {
2981     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2982   } else {
2983    *newmat = B;
2984   }
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 /*MC
2989    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2990 
2991    Options Database Keys:
2992 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2993 . -mat_block_size <bs> - set the blocksize used to store the matrix
2994 - -mat_use_hash_table <fact>
2995 
2996   Level: beginner
2997 
2998 .seealso: MatCreateMPIBAIJ
2999 M*/
3000 
3001 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3002 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3003 
3004 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3005 {
3006   Mat_MPIBAIJ    *b;
3007   PetscErrorCode ierr;
3008   PetscBool      flg = PETSC_FALSE;
3009 
3010   PetscFunctionBegin;
3011   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3012   B->data = (void*)b;
3013 
3014   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3015   B->assembled = PETSC_FALSE;
3016 
3017   B->insertmode = NOT_SET_VALUES;
3018   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3019   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3020 
3021   /* build local table of row and column ownerships */
3022   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3023 
3024   /* build cache for off array entries formed */
3025   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3026 
3027   b->donotstash  = PETSC_FALSE;
3028   b->colmap      = NULL;
3029   b->garray      = NULL;
3030   b->roworiented = PETSC_TRUE;
3031 
3032   /* stuff used in block assembly */
3033   b->barray = 0;
3034 
3035   /* stuff used for matrix vector multiply */
3036   b->lvec  = 0;
3037   b->Mvctx = 0;
3038 
3039   /* stuff for MatGetRow() */
3040   b->rowindices   = 0;
3041   b->rowvalues    = 0;
3042   b->getrowactive = PETSC_FALSE;
3043 
3044   /* hash table stuff */
3045   b->ht           = 0;
3046   b->hd           = 0;
3047   b->ht_size      = 0;
3048   b->ht_flag      = PETSC_FALSE;
3049   b->ht_fact      = 0;
3050   b->ht_total_ct  = 0;
3051   b->ht_insert_ct = 0;
3052 
3053   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3054   b->ijonly = PETSC_FALSE;
3055 
3056 
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3060 #if defined(PETSC_HAVE_HYPRE)
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3062 #endif
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3071   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3072 
3073   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3074   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3075   if (flg) {
3076     PetscReal fact = 1.39;
3077     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3078     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3079     if (fact <= 1.0) fact = 1.39;
3080     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3081     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3082   }
3083   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 /*MC
3088    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3089 
3090    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3091    and MATMPIBAIJ otherwise.
3092 
3093    Options Database Keys:
3094 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3095 
3096   Level: beginner
3097 
3098 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3099 M*/
3100 
3101 /*@C
3102    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3103    (block compressed row).  For good matrix assembly performance
3104    the user should preallocate the matrix storage by setting the parameters
3105    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3106    performance can be increased by more than a factor of 50.
3107 
3108    Collective on Mat
3109 
3110    Input Parameters:
3111 +  B - the matrix
3112 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3113           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3114 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3115            submatrix  (same for all local rows)
3116 .  d_nnz - array containing the number of block nonzeros in the various block rows
3117            of the in diagonal portion of the local (possibly different for each block
3118            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3119            set it even if it is zero.
3120 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3121            submatrix (same for all local rows).
3122 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3123            off-diagonal portion of the local submatrix (possibly different for
3124            each block row) or NULL.
3125 
3126    If the *_nnz parameter is given then the *_nz parameter is ignored
3127 
3128    Options Database Keys:
3129 +   -mat_block_size - size of the blocks to use
3130 -   -mat_use_hash_table <fact>
3131 
3132    Notes:
3133    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3134    than it must be used on all processors that share the object for that argument.
3135 
3136    Storage Information:
3137    For a square global matrix we define each processor's diagonal portion
3138    to be its local rows and the corresponding columns (a square submatrix);
3139    each processor's off-diagonal portion encompasses the remainder of the
3140    local matrix (a rectangular submatrix).
3141 
3142    The user can specify preallocated storage for the diagonal part of
3143    the local submatrix with either d_nz or d_nnz (not both).  Set
3144    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3145    memory allocation.  Likewise, specify preallocated storage for the
3146    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3147 
3148    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3149    the figure below we depict these three local rows and all columns (0-11).
3150 
3151 .vb
3152            0 1 2 3 4 5 6 7 8 9 10 11
3153           --------------------------
3154    row 3  |o o o d d d o o o o  o  o
3155    row 4  |o o o d d d o o o o  o  o
3156    row 5  |o o o d d d o o o o  o  o
3157           --------------------------
3158 .ve
3159 
3160    Thus, any entries in the d locations are stored in the d (diagonal)
3161    submatrix, and any entries in the o locations are stored in the
3162    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3163    stored simply in the MATSEQBAIJ format for compressed row storage.
3164 
3165    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3166    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3167    In general, for PDE problems in which most nonzeros are near the diagonal,
3168    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3169    or you will get TERRIBLE performance; see the users' manual chapter on
3170    matrices.
3171 
3172    You can call MatGetInfo() to get information on how effective the preallocation was;
3173    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3174    You can also run with the option -info and look for messages with the string
3175    malloc in them to see if additional memory allocation was needed.
3176 
3177    Level: intermediate
3178 
3179 .keywords: matrix, block, aij, compressed row, sparse, parallel
3180 
3181 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3182 @*/
3183 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3184 {
3185   PetscErrorCode ierr;
3186 
3187   PetscFunctionBegin;
3188   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3189   PetscValidType(B,1);
3190   PetscValidLogicalCollectiveInt(B,bs,2);
3191   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3192   PetscFunctionReturn(0);
3193 }
3194 
3195 /*@C
3196    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3197    (block compressed row).  For good matrix assembly performance
3198    the user should preallocate the matrix storage by setting the parameters
3199    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3200    performance can be increased by more than a factor of 50.
3201 
3202    Collective on MPI_Comm
3203 
3204    Input Parameters:
3205 +  comm - MPI communicator
3206 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3207           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3208 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3209            This value should be the same as the local size used in creating the
3210            y vector for the matrix-vector product y = Ax.
3211 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3212            This value should be the same as the local size used in creating the
3213            x vector for the matrix-vector product y = Ax.
3214 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3215 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3216 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3217            submatrix  (same for all local rows)
3218 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3219            of the in diagonal portion of the local (possibly different for each block
3220            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3221            and set it even if it is zero.
3222 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3223            submatrix (same for all local rows).
3224 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3225            off-diagonal portion of the local submatrix (possibly different for
3226            each block row) or NULL.
3227 
3228    Output Parameter:
3229 .  A - the matrix
3230 
3231    Options Database Keys:
3232 +   -mat_block_size - size of the blocks to use
3233 -   -mat_use_hash_table <fact>
3234 
3235    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3236    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3237    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3238 
3239    Notes:
3240    If the *_nnz parameter is given then the *_nz parameter is ignored
3241 
3242    A nonzero block is any block that as 1 or more nonzeros in it
3243 
3244    The user MUST specify either the local or global matrix dimensions
3245    (possibly both).
3246 
3247    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3248    than it must be used on all processors that share the object for that argument.
3249 
3250    Storage Information:
3251    For a square global matrix we define each processor's diagonal portion
3252    to be its local rows and the corresponding columns (a square submatrix);
3253    each processor's off-diagonal portion encompasses the remainder of the
3254    local matrix (a rectangular submatrix).
3255 
3256    The user can specify preallocated storage for the diagonal part of
3257    the local submatrix with either d_nz or d_nnz (not both).  Set
3258    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3259    memory allocation.  Likewise, specify preallocated storage for the
3260    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3261 
3262    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3263    the figure below we depict these three local rows and all columns (0-11).
3264 
3265 .vb
3266            0 1 2 3 4 5 6 7 8 9 10 11
3267           --------------------------
3268    row 3  |o o o d d d o o o o  o  o
3269    row 4  |o o o d d d o o o o  o  o
3270    row 5  |o o o d d d o o o o  o  o
3271           --------------------------
3272 .ve
3273 
3274    Thus, any entries in the d locations are stored in the d (diagonal)
3275    submatrix, and any entries in the o locations are stored in the
3276    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3277    stored simply in the MATSEQBAIJ format for compressed row storage.
3278 
3279    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3280    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3281    In general, for PDE problems in which most nonzeros are near the diagonal,
3282    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3283    or you will get TERRIBLE performance; see the users' manual chapter on
3284    matrices.
3285 
3286    Level: intermediate
3287 
3288 .keywords: matrix, block, aij, compressed row, sparse, parallel
3289 
3290 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3291 @*/
3292 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3293 {
3294   PetscErrorCode ierr;
3295   PetscMPIInt    size;
3296 
3297   PetscFunctionBegin;
3298   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3299   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3300   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3301   if (size > 1) {
3302     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3303     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3304   } else {
3305     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3306     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3307   }
3308   PetscFunctionReturn(0);
3309 }
3310 
3311 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3312 {
3313   Mat            mat;
3314   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3315   PetscErrorCode ierr;
3316   PetscInt       len=0;
3317 
3318   PetscFunctionBegin;
3319   *newmat = 0;
3320   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3321   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3322   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3323 
3324   mat->factortype   = matin->factortype;
3325   mat->preallocated = PETSC_TRUE;
3326   mat->assembled    = PETSC_TRUE;
3327   mat->insertmode   = NOT_SET_VALUES;
3328 
3329   a             = (Mat_MPIBAIJ*)mat->data;
3330   mat->rmap->bs = matin->rmap->bs;
3331   a->bs2        = oldmat->bs2;
3332   a->mbs        = oldmat->mbs;
3333   a->nbs        = oldmat->nbs;
3334   a->Mbs        = oldmat->Mbs;
3335   a->Nbs        = oldmat->Nbs;
3336 
3337   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3338   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3339 
3340   a->size         = oldmat->size;
3341   a->rank         = oldmat->rank;
3342   a->donotstash   = oldmat->donotstash;
3343   a->roworiented  = oldmat->roworiented;
3344   a->rowindices   = 0;
3345   a->rowvalues    = 0;
3346   a->getrowactive = PETSC_FALSE;
3347   a->barray       = 0;
3348   a->rstartbs     = oldmat->rstartbs;
3349   a->rendbs       = oldmat->rendbs;
3350   a->cstartbs     = oldmat->cstartbs;
3351   a->cendbs       = oldmat->cendbs;
3352 
3353   /* hash table stuff */
3354   a->ht           = 0;
3355   a->hd           = 0;
3356   a->ht_size      = 0;
3357   a->ht_flag      = oldmat->ht_flag;
3358   a->ht_fact      = oldmat->ht_fact;
3359   a->ht_total_ct  = 0;
3360   a->ht_insert_ct = 0;
3361 
3362   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3363   if (oldmat->colmap) {
3364 #if defined(PETSC_USE_CTABLE)
3365     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3366 #else
3367     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3368     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3369     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3370 #endif
3371   } else a->colmap = 0;
3372 
3373   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3374     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3375     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3376     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3377   } else a->garray = 0;
3378 
3379   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3380   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3381   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3382   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3383   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3384 
3385   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3386   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3387   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3388   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3389   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3390   *newmat = mat;
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3395 {
3396   PetscErrorCode ierr;
3397   int            fd;
3398   PetscInt       i,nz,j,rstart,rend;
3399   PetscScalar    *vals,*buf;
3400   MPI_Comm       comm;
3401   MPI_Status     status;
3402   PetscMPIInt    rank,size,maxnz;
3403   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3404   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3405   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3406   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3407   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3408   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3409   PetscBool      isbinary;
3410 
3411   PetscFunctionBegin;
3412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3413   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3414 
3415   /* force binary viewer to load .info file if it has not yet done so */
3416   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3417   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3418   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3419   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3420   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3421   if (bs < 0) bs = 1;
3422 
3423   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3424   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3425   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3426   if (!rank) {
3427     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3428     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3429     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3430   }
3431   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3432   M    = header[1]; N = header[2];
3433 
3434   /* If global sizes are set, check if they are consistent with that given in the file */
3435   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3436   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3437 
3438   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3439 
3440   /*
3441      This code adds extra rows to make sure the number of rows is
3442      divisible by the blocksize
3443   */
3444   Mbs        = M/bs;
3445   extra_rows = bs - M + bs*Mbs;
3446   if (extra_rows == bs) extra_rows = 0;
3447   else                  Mbs++;
3448   if (extra_rows && !rank) {
3449     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3450   }
3451 
3452   /* determine ownership of all rows */
3453   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3454     mbs = Mbs/size + ((Mbs % size) > rank);
3455     m   = mbs*bs;
3456   } else { /* User set */
3457     m   = newmat->rmap->n;
3458     mbs = m/bs;
3459   }
3460   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3461   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3462 
3463   /* process 0 needs enough room for process with most rows */
3464   if (!rank) {
3465     mmax = rowners[1];
3466     for (i=2; i<=size; i++) {
3467       mmax = PetscMax(mmax,rowners[i]);
3468     }
3469     mmax*=bs;
3470   } else mmax = -1;             /* unused, but compiler warns anyway */
3471 
3472   rowners[0] = 0;
3473   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3474   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3475   rstart = rowners[rank];
3476   rend   = rowners[rank+1];
3477 
3478   /* distribute row lengths to all processors */
3479   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3480   if (!rank) {
3481     mend = m;
3482     if (size == 1) mend = mend - extra_rows;
3483     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3484     for (j=mend; j<m; j++) locrowlens[j] = 1;
3485     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3486     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3487     for (j=0; j<m; j++) {
3488       procsnz[0] += locrowlens[j];
3489     }
3490     for (i=1; i<size; i++) {
3491       mend = browners[i+1] - browners[i];
3492       if (i == size-1) mend = mend - extra_rows;
3493       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3494       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3495       /* calculate the number of nonzeros on each processor */
3496       for (j=0; j<browners[i+1]-browners[i]; j++) {
3497         procsnz[i] += rowlengths[j];
3498       }
3499       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3500     }
3501     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3502   } else {
3503     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3504   }
3505 
3506   if (!rank) {
3507     /* determine max buffer needed and allocate it */
3508     maxnz = procsnz[0];
3509     for (i=1; i<size; i++) {
3510       maxnz = PetscMax(maxnz,procsnz[i]);
3511     }
3512     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3513 
3514     /* read in my part of the matrix column indices  */
3515     nz     = procsnz[0];
3516     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3517     mycols = ibuf;
3518     if (size == 1) nz -= extra_rows;
3519     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3520     if (size == 1) {
3521       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3522     }
3523 
3524     /* read in every ones (except the last) and ship off */
3525     for (i=1; i<size-1; i++) {
3526       nz   = procsnz[i];
3527       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3528       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3529     }
3530     /* read in the stuff for the last proc */
3531     if (size != 1) {
3532       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3533       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3534       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3535       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3536     }
3537     ierr = PetscFree(cols);CHKERRQ(ierr);
3538   } else {
3539     /* determine buffer space needed for message */
3540     nz = 0;
3541     for (i=0; i<m; i++) {
3542       nz += locrowlens[i];
3543     }
3544     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3545     mycols = ibuf;
3546     /* receive message of column indices*/
3547     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3548     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3549     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3550   }
3551 
3552   /* loop over local rows, determining number of off diagonal entries */
3553   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3554   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3555   rowcount = 0; nzcount = 0;
3556   for (i=0; i<mbs; i++) {
3557     dcount  = 0;
3558     odcount = 0;
3559     for (j=0; j<bs; j++) {
3560       kmax = locrowlens[rowcount];
3561       for (k=0; k<kmax; k++) {
3562         tmp = mycols[nzcount++]/bs;
3563         if (!mask[tmp]) {
3564           mask[tmp] = 1;
3565           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3566           else masked1[dcount++] = tmp;
3567         }
3568       }
3569       rowcount++;
3570     }
3571 
3572     dlens[i]  = dcount;
3573     odlens[i] = odcount;
3574 
3575     /* zero out the mask elements we set */
3576     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3577     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3578   }
3579 
3580   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3581   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3582 
3583   if (!rank) {
3584     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3585     /* read in my part of the matrix numerical values  */
3586     nz     = procsnz[0];
3587     vals   = buf;
3588     mycols = ibuf;
3589     if (size == 1) nz -= extra_rows;
3590     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3591     if (size == 1) {
3592       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3593     }
3594 
3595     /* insert into matrix */
3596     jj = rstart*bs;
3597     for (i=0; i<m; i++) {
3598       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3599       mycols += locrowlens[i];
3600       vals   += locrowlens[i];
3601       jj++;
3602     }
3603     /* read in other processors (except the last one) and ship out */
3604     for (i=1; i<size-1; i++) {
3605       nz   = procsnz[i];
3606       vals = buf;
3607       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3608       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3609     }
3610     /* the last proc */
3611     if (size != 1) {
3612       nz   = procsnz[i] - extra_rows;
3613       vals = buf;
3614       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3615       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3616       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3617     }
3618     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3619   } else {
3620     /* receive numeric values */
3621     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3622 
3623     /* receive message of values*/
3624     vals   = buf;
3625     mycols = ibuf;
3626     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3627 
3628     /* insert into matrix */
3629     jj = rstart*bs;
3630     for (i=0; i<m; i++) {
3631       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3632       mycols += locrowlens[i];
3633       vals   += locrowlens[i];
3634       jj++;
3635     }
3636   }
3637   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3638   ierr = PetscFree(buf);CHKERRQ(ierr);
3639   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3640   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3641   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3642   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3643   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3644   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3645   PetscFunctionReturn(0);
3646 }
3647 
3648 /*@
3649    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3650 
3651    Input Parameters:
3652 .  mat  - the matrix
3653 .  fact - factor
3654 
3655    Not Collective, each process can use a different factor
3656 
3657    Level: advanced
3658 
3659   Notes:
3660    This can also be set by the command line option: -mat_use_hash_table <fact>
3661 
3662 .keywords: matrix, hashtable, factor, HT
3663 
3664 .seealso: MatSetOption()
3665 @*/
3666 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3667 {
3668   PetscErrorCode ierr;
3669 
3670   PetscFunctionBegin;
3671   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3676 {
3677   Mat_MPIBAIJ *baij;
3678 
3679   PetscFunctionBegin;
3680   baij          = (Mat_MPIBAIJ*)mat->data;
3681   baij->ht_fact = fact;
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3686 {
3687   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3688   PetscBool      flg;
3689   PetscErrorCode ierr;
3690 
3691   PetscFunctionBegin;
3692   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3693   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3694   if (Ad)     *Ad     = a->A;
3695   if (Ao)     *Ao     = a->B;
3696   if (colmap) *colmap = a->garray;
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 /*
3701     Special version for direct calls from Fortran (to eliminate two function call overheads
3702 */
3703 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3704 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3705 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3706 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3707 #endif
3708 
3709 /*@C
3710   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3711 
3712   Collective on Mat
3713 
3714   Input Parameters:
3715 + mat - the matrix
3716 . min - number of input rows
3717 . im - input rows
3718 . nin - number of input columns
3719 . in - input columns
3720 . v - numerical values input
3721 - addvin - INSERT_VALUES or ADD_VALUES
3722 
3723   Notes:
3724     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3725 
3726   Level: advanced
3727 
3728 .seealso:   MatSetValuesBlocked()
3729 @*/
3730 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3731 {
3732   /* convert input arguments to C version */
3733   Mat        mat  = *matin;
3734   PetscInt   m    = *min, n = *nin;
3735   InsertMode addv = *addvin;
3736 
3737   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3738   const MatScalar *value;
3739   MatScalar       *barray     = baij->barray;
3740   PetscBool       roworiented = baij->roworiented;
3741   PetscErrorCode  ierr;
3742   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3743   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3744   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3745 
3746   PetscFunctionBegin;
3747   /* tasks normally handled by MatSetValuesBlocked() */
3748   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3749 #if defined(PETSC_USE_DEBUG)
3750   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3751   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3752 #endif
3753   if (mat->assembled) {
3754     mat->was_assembled = PETSC_TRUE;
3755     mat->assembled     = PETSC_FALSE;
3756   }
3757   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3758 
3759 
3760   if (!barray) {
3761     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3762     baij->barray = barray;
3763   }
3764 
3765   if (roworiented) stepval = (n-1)*bs;
3766   else stepval = (m-1)*bs;
3767 
3768   for (i=0; i<m; i++) {
3769     if (im[i] < 0) continue;
3770 #if defined(PETSC_USE_DEBUG)
3771     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3772 #endif
3773     if (im[i] >= rstart && im[i] < rend) {
3774       row = im[i] - rstart;
3775       for (j=0; j<n; j++) {
3776         /* If NumCol = 1 then a copy is not required */
3777         if ((roworiented) && (n == 1)) {
3778           barray = (MatScalar*)v + i*bs2;
3779         } else if ((!roworiented) && (m == 1)) {
3780           barray = (MatScalar*)v + j*bs2;
3781         } else { /* Here a copy is required */
3782           if (roworiented) {
3783             value = v + i*(stepval+bs)*bs + j*bs;
3784           } else {
3785             value = v + j*(stepval+bs)*bs + i*bs;
3786           }
3787           for (ii=0; ii<bs; ii++,value+=stepval) {
3788             for (jj=0; jj<bs; jj++) {
3789               *barray++ = *value++;
3790             }
3791           }
3792           barray -=bs2;
3793         }
3794 
3795         if (in[j] >= cstart && in[j] < cend) {
3796           col  = in[j] - cstart;
3797           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3798         } else if (in[j] < 0) continue;
3799 #if defined(PETSC_USE_DEBUG)
3800         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3801 #endif
3802         else {
3803           if (mat->was_assembled) {
3804             if (!baij->colmap) {
3805               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3806             }
3807 
3808 #if defined(PETSC_USE_DEBUG)
3809 #if defined(PETSC_USE_CTABLE)
3810             { PetscInt data;
3811               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3812               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3813             }
3814 #else
3815             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3816 #endif
3817 #endif
3818 #if defined(PETSC_USE_CTABLE)
3819             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3820             col  = (col - 1)/bs;
3821 #else
3822             col = (baij->colmap[in[j]] - 1)/bs;
3823 #endif
3824             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3825               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3826               col  =  in[j];
3827             }
3828           } else col = in[j];
3829           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3830         }
3831       }
3832     } else {
3833       if (!baij->donotstash) {
3834         if (roworiented) {
3835           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3836         } else {
3837           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3838         }
3839       }
3840     }
3841   }
3842 
3843   /* task normally handled by MatSetValuesBlocked() */
3844   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3845   PetscFunctionReturn(0);
3846 }
3847 
3848 /*@
3849      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3850          CSR format the local rows.
3851 
3852    Collective on MPI_Comm
3853 
3854    Input Parameters:
3855 +  comm - MPI communicator
3856 .  bs - the block size, only a block size of 1 is supported
3857 .  m - number of local rows (Cannot be PETSC_DECIDE)
3858 .  n - This value should be the same as the local size used in creating the
3859        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3860        calculated if N is given) For square matrices n is almost always m.
3861 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3862 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3863 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3864 .   j - column indices
3865 -   a - matrix values
3866 
3867    Output Parameter:
3868 .   mat - the matrix
3869 
3870    Level: intermediate
3871 
3872    Notes:
3873        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3874      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3875      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3876 
3877      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3878      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3879      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3880      with column-major ordering within blocks.
3881 
3882        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3883 
3884 .keywords: matrix, aij, compressed row, sparse, parallel
3885 
3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3887           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3888 @*/
3889 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3890 {
3891   PetscErrorCode ierr;
3892 
3893   PetscFunctionBegin;
3894   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3895   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3896   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3897   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3898   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3899   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3900   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3901   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3902   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3903   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3904   PetscFunctionReturn(0);
3905 }
3906 
3907 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3908 {
3909   PetscErrorCode ierr;
3910   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3911   PetscInt       *indx;
3912   PetscScalar    *values;
3913 
3914   PetscFunctionBegin;
3915   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3916   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3917     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3918     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3919     PetscInt       *bindx,rmax=a->rmax,j;
3920     PetscMPIInt    rank,size;
3921 
3922     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3923     mbs = m/bs; Nbs = N/cbs;
3924     if (n == PETSC_DECIDE) {
3925       nbs  = n;
3926       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3927       n    = nbs*cbs;
3928     } else {
3929       nbs = n/cbs;
3930     }
3931 
3932     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3933     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3934 
3935     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3936     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3937     if (rank == size-1) {
3938       /* Check sum(nbs) = Nbs */
3939       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3940     }
3941 
3942     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3943     for (i=0; i<mbs; i++) {
3944       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3945       nnz = nnz/bs;
3946       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3947       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3948       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3949     }
3950     ierr = PetscFree(bindx);CHKERRQ(ierr);
3951 
3952     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3953     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3954     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3955     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3956     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3957     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3958     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3959   }
3960 
3961   /* numeric phase */
3962   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3963   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3964 
3965   for (i=0; i<m; i++) {
3966     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3967     Ii   = i + rstart;
3968     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3969     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3970   }
3971   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3972   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3973   PetscFunctionReturn(0);
3974 }
3975