xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 58c0e5077dcf40d6a880c19c87f1075ae1d22c8e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1107 {
1108   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1109   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1110   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1113   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1114   int            fd;
1115   PetscScalar    *column_values;
1116   FILE           *file;
1117   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1118   PetscInt       message_count,flowcontrolcount;
1119 
1120   PetscFunctionBegin;
1121   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1122   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1123   nz   = bs2*(A->nz + B->nz);
1124   rlen = mat->rmap->n;
1125   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1126   if (!rank) {
1127     header[0] = MAT_FILE_CLASSID;
1128     header[1] = mat->rmap->N;
1129     header[2] = mat->cmap->N;
1130 
1131     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1132     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     /* get largest number of rows any processor has */
1134     range = mat->rmap->range;
1135     for (i=1; i<size; i++) {
1136       rlen = PetscMax(rlen,range[i+1] - range[i]);
1137     }
1138   } else {
1139     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1140   }
1141 
1142   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1143   /* compute lengths of each row  */
1144   for (i=0; i<a->mbs; i++) {
1145     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1146   }
1147   /* store the row lengths to the file */
1148   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1149   if (!rank) {
1150     MPI_Status status;
1151     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1152     rlen = (range[1] - range[0])/bs;
1153     for (i=0; i<rlen; i++) {
1154       for (j=0; j<bs; j++) {
1155         row_lens[i*bs+j] = bs*crow_lens[i];
1156       }
1157     }
1158     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     for (i=1; i<size; i++) {
1160       rlen = (range[i+1] - range[i])/bs;
1161       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1162       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1163       for (k=0; k<rlen; k++) {
1164         for (j=0; j<bs; j++) {
1165           row_lens[k*bs+j] = bs*crow_lens[k];
1166         }
1167       }
1168       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1171     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1172   } else {
1173     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1174     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1176   }
1177   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1178 
1179   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1180      information needed to make it for each row from a block row. This does require more communication but still not more than
1181      the communication needed for the nonzero values  */
1182   nzmax = nz; /*  space a largest processor needs */
1183   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1185   cnt   = 0;
1186   for (i=0; i<a->mbs; i++) {
1187     pcnt = cnt;
1188     for (j=B->i[i]; j<B->i[i+1]; j++) {
1189       if ((col = garray[B->j[j]]) > cstart) break;
1190       for (l=0; l<bs; l++) {
1191         column_indices[cnt++] = bs*col+l;
1192       }
1193     }
1194     for (k=A->i[i]; k<A->i[i+1]; k++) {
1195       for (l=0; l<bs; l++) {
1196         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1197       }
1198     }
1199     for (; j<B->i[i+1]; j++) {
1200       for (l=0; l<bs; l++) {
1201         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1202       }
1203     }
1204     len = cnt - pcnt;
1205     for (k=1; k<bs; k++) {
1206       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1207       cnt += len;
1208     }
1209   }
1210   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1211 
1212   /* store the columns to the file */
1213   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1214   if (!rank) {
1215     MPI_Status status;
1216     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1217     for (i=1; i<size; i++) {
1218       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1219       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1220       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the numerical values */
1233   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<a->mbs; i++) {
1236     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1237     for (j=B->i[i]; j<B->i[i+1]; j++) {
1238       if (garray[B->j[j]] > cstart) break;
1239       for (l=0; l<bs; l++) {
1240         for (ll=0; ll<bs; ll++) {
1241           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1242         }
1243       }
1244       cnt += bs;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) {
1247       for (l=0; l<bs; l++) {
1248         for (ll=0; ll<bs; ll++) {
1249           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1250         }
1251       }
1252       cnt += bs;
1253     }
1254     for (; j<B->i[i+1]; j++) {
1255       for (l=0; l<bs; l++) {
1256         for (ll=0; ll<bs; ll++) {
1257           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1258         }
1259       }
1260       cnt += bs;
1261     }
1262     cnt += (bs-1)*rlen;
1263   }
1264   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1265 
1266   /* store the column values to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) {
1288     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1294 {
1295   PetscErrorCode ierr;
1296   PetscBool      iascii,isdraw,issocket,isbinary;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1300   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1301   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1303   if (iascii || isdraw || issocket) {
1304     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1305   } else if (isbinary) {
1306     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1312 {
1313   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317 #if defined(PETSC_USE_LOG)
1318   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1319 #endif
1320   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1321   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1322   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1323   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1324 #if defined(PETSC_USE_CTABLE)
1325   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1326 #else
1327   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1328 #endif
1329   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1330   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1331   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1332   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1333   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1334   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1335   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1336   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1337 
1338   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1340   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1341   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1342   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1343   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1344   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1345   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1347 #if defined(PETSC_HAVE_HYPRE)
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1349 #endif
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1356 {
1357   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359   PetscInt       nt;
1360 
1361   PetscFunctionBegin;
1362   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1363   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1364   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1365   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1366   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1367   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1368   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1369   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1382   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1387 {
1388   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1389   PetscErrorCode ierr;
1390 
1391   PetscFunctionBegin;
1392   /* do nondiagonal part */
1393   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1394   /* do local part */
1395   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1396   /* add partial results together */
1397   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1398   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   /* do nondiagonal part */
1409   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1410   /* do local part */
1411   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1412   /* add partial results together */
1413   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1414   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*
1419   This only works correctly for square matrices where the subblock A->A is the
1420    diagonal block
1421 */
1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1440   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1445 {
1446   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1447   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1448   PetscErrorCode ierr;
1449   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1450   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1451   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1452 
1453   PetscFunctionBegin;
1454   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1455   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1456   mat->getrowactive = PETSC_TRUE;
1457 
1458   if (!mat->rowvalues && (idx || v)) {
1459     /*
1460         allocate enough space to hold information from the longest row.
1461     */
1462     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1463     PetscInt    max = 1,mbs = mat->mbs,tmp;
1464     for (i=0; i<mbs; i++) {
1465       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1466       if (max < tmp) max = tmp;
1467     }
1468     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1469   }
1470   lrow = row - brstart;
1471 
1472   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1473   if (!v)   {pvA = 0; pvB = 0;}
1474   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1475   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1476   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1477   nztot = nzA + nzB;
1478 
1479   cmap = mat->garray;
1480   if (v  || idx) {
1481     if (nztot) {
1482       /* Sort by increasing column numbers, assuming A and B already sorted */
1483       PetscInt imark = -1;
1484       if (v) {
1485         *v = v_p = mat->rowvalues;
1486         for (i=0; i<nzB; i++) {
1487           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1488           else break;
1489         }
1490         imark = i;
1491         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1492         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1493       }
1494       if (idx) {
1495         *idx = idx_p = mat->rowindices;
1496         if (imark > -1) {
1497           for (i=0; i<imark; i++) {
1498             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1499           }
1500         } else {
1501           for (i=0; i<nzB; i++) {
1502             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1503             else break;
1504           }
1505           imark = i;
1506         }
1507         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1508         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1509       }
1510     } else {
1511       if (idx) *idx = 0;
1512       if (v)   *v   = 0;
1513     }
1514   }
1515   *nz  = nztot;
1516   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1517   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1532 {
1533   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1538   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1543 {
1544   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1545   Mat            A  = a->A,B = a->B;
1546   PetscErrorCode ierr;
1547   PetscReal      isend[5],irecv[5];
1548 
1549   PetscFunctionBegin;
1550   info->block_size = (PetscReal)matin->rmap->bs;
1551 
1552   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1553 
1554   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1555   isend[3] = info->memory;  isend[4] = info->mallocs;
1556 
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558 
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561 
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else if (flag == MAT_GLOBAL_SUM) {
1577     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1578 
1579     info->nz_used      = irecv[0];
1580     info->nz_allocated = irecv[1];
1581     info->nz_unneeded  = irecv[2];
1582     info->memory       = irecv[3];
1583     info->mallocs      = irecv[4];
1584   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     MatCheckPreallocated(A,1);
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     MatCheckPreallocated(A,1);
1609     a->roworiented = flg;
1610 
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615   case MAT_SORTED_FULL:
1616     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1617     break;
1618   case MAT_IGNORE_OFF_PROC_ENTRIES:
1619     a->donotstash = flg;
1620     break;
1621   case MAT_USE_HASH_TABLE:
1622     a->ht_flag = flg;
1623     a->ht_fact = 1.39;
1624     break;
1625   case MAT_SYMMETRIC:
1626   case MAT_STRUCTURALLY_SYMMETRIC:
1627   case MAT_HERMITIAN:
1628   case MAT_SUBMAT_SINGLEIS:
1629   case MAT_SYMMETRY_ETERNAL:
1630     MatCheckPreallocated(A,1);
1631     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1632     break;
1633   default:
1634     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1642   Mat_SeqBAIJ    *Aloc;
1643   Mat            B;
1644   PetscErrorCode ierr;
1645   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1646   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1647   MatScalar      *a;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1651     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1652     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1653     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1654     /* Do not know preallocation information, but must set block size */
1655     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1656   } else {
1657     B = *matout;
1658   }
1659 
1660   /* copy over the A part */
1661   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1662   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1663   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1664 
1665   for (i=0; i<mbs; i++) {
1666     rvals[0] = bs*(baij->rstartbs + i);
1667     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668     for (j=ai[i]; j<ai[i+1]; j++) {
1669       col = (baij->cstartbs+aj[j])*bs;
1670       for (k=0; k<bs; k++) {
1671         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1672 
1673         col++; a += bs;
1674       }
1675     }
1676   }
1677   /* copy over the B part */
1678   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1679   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1680   for (i=0; i<mbs; i++) {
1681     rvals[0] = bs*(baij->rstartbs + i);
1682     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1683     for (j=ai[i]; j<ai[i+1]; j++) {
1684       col = baij->garray[aj[j]]*bs;
1685       for (k=0; k<bs; k++) {
1686         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1687         col++;
1688         a += bs;
1689       }
1690     }
1691   }
1692   ierr = PetscFree(rvals);CHKERRQ(ierr);
1693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695 
1696   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1697   else {
1698     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1704 {
1705   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1706   Mat            a     = baij->A,b = baij->B;
1707   PetscErrorCode ierr;
1708   PetscInt       s1,s2,s3;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1712   if (rr) {
1713     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1714     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1715     /* Overlap communication with computation. */
1716     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717   }
1718   if (ll) {
1719     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1720     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1721     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1722   }
1723   /* scale  the diagonal block */
1724   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1725 
1726   if (rr) {
1727     /* Do a scatter end and then right scale the off-diagonal block */
1728     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1735 {
1736   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1737   PetscInt      *lrows;
1738   PetscInt       r, len;
1739   PetscBool      cong;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   /* get locally owned rows */
1744   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1745   /* fix right hand side if needed */
1746   if (x && b) {
1747     const PetscScalar *xx;
1748     PetscScalar       *bb;
1749 
1750     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1751     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1752     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1753     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1754     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1755   }
1756 
1757   /* actually zap the local rows */
1758   /*
1759         Zero the required rows. If the "diagonal block" of the matrix
1760      is square and the user wishes to set the diagonal we use separate
1761      code so that MatSetValues() is not called for each diagonal allocating
1762      new memory, thus calling lots of mallocs and slowing things down.
1763 
1764   */
1765   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1766   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1767   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1768   if ((diag != 0.0) && cong) {
1769     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1770   } else if (diag != 0.0) {
1771     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1772     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1773        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1774     for (r = 0; r < len; ++r) {
1775       const PetscInt row = lrows[r] + A->rmap->rstart;
1776       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1777     }
1778     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1780   } else {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1782   }
1783   ierr = PetscFree(lrows);CHKERRQ(ierr);
1784 
1785   /* only change matrix nonzero state if pattern was allowed to be changed */
1786   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1787     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1788     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1794 {
1795   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1796   PetscErrorCode    ierr;
1797   PetscMPIInt       n = A->rmap->n;
1798   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1799   PetscInt          *lrows,*owners = A->rmap->range;
1800   PetscSFNode       *rrows;
1801   PetscSF           sf;
1802   const PetscScalar *xx;
1803   PetscScalar       *bb,*mask;
1804   Vec               xmask,lmask;
1805   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1806   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1807   PetscScalar       *aa;
1808 
1809   PetscFunctionBegin;
1810   /* Create SF where leaves are input rows and roots are owned rows */
1811   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1812   for (r = 0; r < n; ++r) lrows[r] = -1;
1813   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1814   for (r = 0; r < N; ++r) {
1815     const PetscInt idx   = rows[r];
1816     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1817     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1818       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1819     }
1820     rrows[r].rank  = p;
1821     rrows[r].index = rows[r] - owners[p];
1822   }
1823   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1824   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1825   /* Collect flags for rows to be zeroed */
1826   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1827   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1828   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1829   /* Compress and put in row numbers */
1830   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1831   /* zero diagonal part of matrix */
1832   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1833   /* handle off diagonal part of matrix */
1834   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1835   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1836   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1837   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1838   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1839   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1840   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1841   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1842   if (x) {
1843     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1844     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1845     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1846     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1847   }
1848   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1849   /* remove zeroed rows of off diagonal matrix */
1850   for (i = 0; i < len; ++i) {
1851     row   = lrows[i];
1852     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1853     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1854     for (k = 0; k < count; ++k) {
1855       aa[0] = 0.0;
1856       aa   += bs;
1857     }
1858   }
1859   /* loop over all elements of off process part of matrix zeroing removed columns*/
1860   for (i = 0; i < l->B->rmap->N; ++i) {
1861     row = i/bs;
1862     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1863       for (k = 0; k < bs; ++k) {
1864         col = bs*baij->j[j] + k;
1865         if (PetscAbsScalar(mask[col])) {
1866           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1867           if (x) bb[i] -= aa[0]*xx[col];
1868           aa[0] = 0.0;
1869         }
1870       }
1871     }
1872   }
1873   if (x) {
1874     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1875     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1876   }
1877   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1878   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1879   ierr = PetscFree(lrows);CHKERRQ(ierr);
1880 
1881   /* only change matrix nonzero state if pattern was allowed to be changed */
1882   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1883     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1884     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1890 {
1891   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1900 
1901 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1902 {
1903   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1904   Mat            a,b,c,d;
1905   PetscBool      flg;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   a = matA->A; b = matA->B;
1910   c = matB->A; d = matB->B;
1911 
1912   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1913   if (flg) {
1914     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1915   }
1916   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1921 {
1922   PetscErrorCode ierr;
1923   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1924   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1925 
1926   PetscFunctionBegin;
1927   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1928   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1929     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1930   } else {
1931     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1932     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1933   }
1934   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1939 {
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1948 {
1949   PetscErrorCode ierr;
1950   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1951   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1952   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1953 
1954   PetscFunctionBegin;
1955   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1960 {
1961   PetscErrorCode ierr;
1962   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1963   PetscBLASInt   bnz,one=1;
1964   Mat_SeqBAIJ    *x,*y;
1965   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1966 
1967   PetscFunctionBegin;
1968   if (str == SAME_NONZERO_PATTERN) {
1969     PetscScalar alpha = a;
1970     x    = (Mat_SeqBAIJ*)xx->A->data;
1971     y    = (Mat_SeqBAIJ*)yy->A->data;
1972     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1973     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1974     x    = (Mat_SeqBAIJ*)xx->B->data;
1975     y    = (Mat_SeqBAIJ*)yy->B->data;
1976     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1977     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1978     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1979   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   } else {
1982     Mat      B;
1983     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1984     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1985     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1986     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1987     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1988     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1989     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1990     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1991     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1992     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1993     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1994     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1995     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1996     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1997     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1998     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2004 {
2005   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2010   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2015 {
2016   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2017   PetscErrorCode ierr;
2018 
2019   PetscFunctionBegin;
2020   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2021   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2026 {
2027   PetscErrorCode ierr;
2028   IS             iscol_local;
2029   PetscInt       csize;
2030 
2031   PetscFunctionBegin;
2032   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2033   if (call == MAT_REUSE_MATRIX) {
2034     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2035     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2036   } else {
2037     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2038   }
2039   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2040   if (call == MAT_INITIAL_MATRIX) {
2041     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2042     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 /*
2048   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2049   in local and then by concatenating the local matrices the end result.
2050   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2051   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2052 */
2053 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2054 {
2055   PetscErrorCode ierr;
2056   PetscMPIInt    rank,size;
2057   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2058   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2059   Mat            M,Mreuse;
2060   MatScalar      *vwork,*aa;
2061   MPI_Comm       comm;
2062   IS             isrow_new, iscol_new;
2063   Mat_SeqBAIJ    *aij;
2064 
2065   PetscFunctionBegin;
2066   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069   /* The compression and expansion should be avoided. Doesn't point
2070      out errors, might change the indices, hence buggey */
2071   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2072   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2073 
2074   if (call ==  MAT_REUSE_MATRIX) {
2075     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2076     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2077     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2078   } else {
2079     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2080   }
2081   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2082   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2083   /*
2084       m - number of local rows
2085       n - number of columns (same on all processors)
2086       rstart - first row in new global matrix generated
2087   */
2088   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2089   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2090   m    = m/bs;
2091   n    = n/bs;
2092 
2093   if (call == MAT_INITIAL_MATRIX) {
2094     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2095     ii  = aij->i;
2096     jj  = aij->j;
2097 
2098     /*
2099         Determine the number of non-zeros in the diagonal and off-diagonal
2100         portions of the matrix in order to do correct preallocation
2101     */
2102 
2103     /* first get start and end of "diagonal" columns */
2104     if (csize == PETSC_DECIDE) {
2105       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2106       if (mglobal == n*bs) { /* square matrix */
2107         nlocal = m;
2108       } else {
2109         nlocal = n/size + ((n % size) > rank);
2110       }
2111     } else {
2112       nlocal = csize/bs;
2113     }
2114     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2115     rstart = rend - nlocal;
2116     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2117 
2118     /* next, compute all the lengths */
2119     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2120     for (i=0; i<m; i++) {
2121       jend = ii[i+1] - ii[i];
2122       olen = 0;
2123       dlen = 0;
2124       for (j=0; j<jend; j++) {
2125         if (*jj < rstart || *jj >= rend) olen++;
2126         else dlen++;
2127         jj++;
2128       }
2129       olens[i] = olen;
2130       dlens[i] = dlen;
2131     }
2132     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2133     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2134     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2135     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2136     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2137     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2138   } else {
2139     PetscInt ml,nl;
2140 
2141     M    = *newmat;
2142     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2143     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2144     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2145     /*
2146          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2147        rather than the slower MatSetValues().
2148     */
2149     M->was_assembled = PETSC_TRUE;
2150     M->assembled     = PETSC_FALSE;
2151   }
2152   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2153   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2154   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2155   ii   = aij->i;
2156   jj   = aij->j;
2157   aa   = aij->a;
2158   for (i=0; i<m; i++) {
2159     row   = rstart/bs + i;
2160     nz    = ii[i+1] - ii[i];
2161     cwork = jj;     jj += nz;
2162     vwork = aa;     aa += nz*bs*bs;
2163     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2164   }
2165 
2166   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   *newmat = M;
2169 
2170   /* save submatrix used in processor for next request */
2171   if (call ==  MAT_INITIAL_MATRIX) {
2172     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2173     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2179 {
2180   MPI_Comm       comm,pcomm;
2181   PetscInt       clocal_size,nrows;
2182   const PetscInt *rows;
2183   PetscMPIInt    size;
2184   IS             crowp,lcolp;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2189   /* make a collective version of 'rowp' */
2190   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2191   if (pcomm==comm) {
2192     crowp = rowp;
2193   } else {
2194     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2195     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2196     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2197     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2198   }
2199   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2200   /* make a local version of 'colp' */
2201   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2202   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2203   if (size==1) {
2204     lcolp = colp;
2205   } else {
2206     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2207   }
2208   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2209   /* now we just get the submatrix */
2210   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2211   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2212   /* clean up */
2213   if (pcomm!=comm) {
2214     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2215   }
2216   if (size>1) {
2217     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2218   }
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2223 {
2224   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2225   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2226 
2227   PetscFunctionBegin;
2228   if (nghosts) *nghosts = B->nbs;
2229   if (ghosts) *ghosts = baij->garray;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2234 {
2235   Mat            B;
2236   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2237   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2238   Mat_SeqAIJ     *b;
2239   PetscErrorCode ierr;
2240   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2241   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2242   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2243 
2244   PetscFunctionBegin;
2245   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2246   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2247 
2248   /* ----------------------------------------------------------------
2249      Tell every processor the number of nonzeros per row
2250   */
2251   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2252   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2253     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2254   }
2255   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2256   displs    = recvcounts + size;
2257   for (i=0; i<size; i++) {
2258     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2259     displs[i]     = A->rmap->range[i]/bs;
2260   }
2261 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2262   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2263 #else
2264   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2265   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2266 #endif
2267   /* ---------------------------------------------------------------
2268      Create the sequential matrix of the same type as the local block diagonal
2269   */
2270   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2271   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2272   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2273   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2274   b    = (Mat_SeqAIJ*)B->data;
2275 
2276   /*--------------------------------------------------------------------
2277     Copy my part of matrix column indices over
2278   */
2279   sendcount  = ad->nz + bd->nz;
2280   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2281   a_jsendbuf = ad->j;
2282   b_jsendbuf = bd->j;
2283   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2284   cnt        = 0;
2285   for (i=0; i<n; i++) {
2286 
2287     /* put in lower diagonal portion */
2288     m = bd->i[i+1] - bd->i[i];
2289     while (m > 0) {
2290       /* is it above diagonal (in bd (compressed) numbering) */
2291       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2292       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2293       m--;
2294     }
2295 
2296     /* put in diagonal portion */
2297     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2298       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2299     }
2300 
2301     /* put in upper diagonal portion */
2302     while (m-- > 0) {
2303       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2304     }
2305   }
2306   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2307 
2308   /*--------------------------------------------------------------------
2309     Gather all column indices to all processors
2310   */
2311   for (i=0; i<size; i++) {
2312     recvcounts[i] = 0;
2313     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2314       recvcounts[i] += lens[j];
2315     }
2316   }
2317   displs[0] = 0;
2318   for (i=1; i<size; i++) {
2319     displs[i] = displs[i-1] + recvcounts[i-1];
2320   }
2321 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2322   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2323 #else
2324   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2325 #endif
2326   /*--------------------------------------------------------------------
2327     Assemble the matrix into useable form (note numerical values not yet set)
2328   */
2329   /* set the b->ilen (length of each row) values */
2330   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2331   /* set the b->i indices */
2332   b->i[0] = 0;
2333   for (i=1; i<=A->rmap->N/bs; i++) {
2334     b->i[i] = b->i[i-1] + lens[i-1];
2335   }
2336   ierr = PetscFree(lens);CHKERRQ(ierr);
2337   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2340 
2341   if (A->symmetric) {
2342     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2343   } else if (A->hermitian) {
2344     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2345   } else if (A->structurally_symmetric) {
2346     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2347   }
2348   *newmat = B;
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2353 {
2354   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2355   PetscErrorCode ierr;
2356   Vec            bb1 = 0;
2357 
2358   PetscFunctionBegin;
2359   if (flag == SOR_APPLY_UPPER) {
2360     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2365     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2366   }
2367 
2368   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2369     if (flag & SOR_ZERO_INITIAL_GUESS) {
2370       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2371       its--;
2372     }
2373 
2374     while (its--) {
2375       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2376       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2377 
2378       /* update rhs: bb1 = bb - B*x */
2379       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2380       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2381 
2382       /* local sweep */
2383       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2384     }
2385   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2386     if (flag & SOR_ZERO_INITIAL_GUESS) {
2387       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2388       its--;
2389     }
2390     while (its--) {
2391       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2393 
2394       /* update rhs: bb1 = bb - B*x */
2395       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2396       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2397 
2398       /* local sweep */
2399       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2400     }
2401   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2402     if (flag & SOR_ZERO_INITIAL_GUESS) {
2403       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2404       its--;
2405     }
2406     while (its--) {
2407       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2408       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2409 
2410       /* update rhs: bb1 = bb - B*x */
2411       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2412       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2413 
2414       /* local sweep */
2415       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2416     }
2417   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2418 
2419   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2424 {
2425   PetscErrorCode ierr;
2426   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2427   PetscInt       N,i,*garray = aij->garray;
2428   PetscInt       ib,jb,bs = A->rmap->bs;
2429   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2430   MatScalar      *a_val = a_aij->a;
2431   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2432   MatScalar      *b_val = b_aij->a;
2433   PetscReal      *work;
2434 
2435   PetscFunctionBegin;
2436   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2437   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2438   if (type == NORM_2) {
2439     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2440       for (jb=0; jb<bs; jb++) {
2441         for (ib=0; ib<bs; ib++) {
2442           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2443           a_val++;
2444         }
2445       }
2446     }
2447     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2448       for (jb=0; jb<bs; jb++) {
2449         for (ib=0; ib<bs; ib++) {
2450           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2451           b_val++;
2452         }
2453       }
2454     }
2455   } else if (type == NORM_1) {
2456     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2457       for (jb=0; jb<bs; jb++) {
2458         for (ib=0; ib<bs; ib++) {
2459           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2460           a_val++;
2461         }
2462       }
2463     }
2464     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2465       for (jb=0; jb<bs; jb++) {
2466        for (ib=0; ib<bs; ib++) {
2467           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2468           b_val++;
2469         }
2470       }
2471     }
2472   } else if (type == NORM_INFINITY) {
2473     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2474       for (jb=0; jb<bs; jb++) {
2475         for (ib=0; ib<bs; ib++) {
2476           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2477           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2478           a_val++;
2479         }
2480       }
2481     }
2482     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           int col = garray[b_aij->j[i]] * bs + jb;
2486           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2487           b_val++;
2488         }
2489       }
2490     }
2491   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2492   if (type == NORM_INFINITY) {
2493     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2494   } else {
2495     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2496   }
2497   ierr = PetscFree(work);CHKERRQ(ierr);
2498   if (type == NORM_2) {
2499     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2505 {
2506   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2507   PetscErrorCode ierr;
2508 
2509   PetscFunctionBegin;
2510   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2511   A->factorerrortype             = a->A->factorerrortype;
2512   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2513   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2518 {
2519   PetscErrorCode ierr;
2520   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2521   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2522 
2523   PetscFunctionBegin;
2524   if (!Y->preallocated) {
2525     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2526   } else if (!aij->nz) {
2527     PetscInt nonew = aij->nonew;
2528     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2529     aij->nonew = nonew;
2530   }
2531   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2536 {
2537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2542   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2543   if (d) {
2544     PetscInt rstart;
2545     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2546     *d += rstart/A->rmap->bs;
2547 
2548   }
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2553 {
2554   PetscFunctionBegin;
2555   *a = ((Mat_MPIBAIJ*)A->data)->A;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2561                                        MatGetRow_MPIBAIJ,
2562                                        MatRestoreRow_MPIBAIJ,
2563                                        MatMult_MPIBAIJ,
2564                                 /* 4*/ MatMultAdd_MPIBAIJ,
2565                                        MatMultTranspose_MPIBAIJ,
2566                                        MatMultTransposeAdd_MPIBAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIBAIJ,
2574                                        MatTranspose_MPIBAIJ,
2575                                 /*15*/ MatGetInfo_MPIBAIJ,
2576                                        MatEqual_MPIBAIJ,
2577                                        MatGetDiagonal_MPIBAIJ,
2578                                        MatDiagonalScale_MPIBAIJ,
2579                                        MatNorm_MPIBAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2581                                        MatAssemblyEnd_MPIBAIJ,
2582                                        MatSetOption_MPIBAIJ,
2583                                        MatZeroEntries_MPIBAIJ,
2584                                 /*24*/ MatZeroRows_MPIBAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIBAIJ,
2590                                        0,
2591                                        0,
2592                                        MatGetDiagonalBlock_MPIBAIJ,
2593                                        0,
2594                                 /*34*/ MatDuplicate_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*39*/ MatAXPY_MPIBAIJ,
2600                                        MatCreateSubMatrices_MPIBAIJ,
2601                                        MatIncreaseOverlap_MPIBAIJ,
2602                                        MatGetValues_MPIBAIJ,
2603                                        MatCopy_MPIBAIJ,
2604                                 /*44*/ 0,
2605                                        MatScale_MPIBAIJ,
2606                                        MatShift_MPIBAIJ,
2607                                        0,
2608                                        MatZeroRowsColumns_MPIBAIJ,
2609                                 /*49*/ 0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2615                                        0,
2616                                        MatSetUnfactored_MPIBAIJ,
2617                                        MatPermute_MPIBAIJ,
2618                                        MatSetValuesBlocked_MPIBAIJ,
2619                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2620                                        MatDestroy_MPIBAIJ,
2621                                        MatView_MPIBAIJ,
2622                                        0,
2623                                        0,
2624                                 /*64*/ 0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*74*/ 0,
2635                                        MatFDColoringApply_BAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*79*/ 0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        MatLoad_MPIBAIJ,
2644                                 /*84*/ 0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*89*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*94*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*99*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*104*/0,
2665                                        MatRealPart_MPIBAIJ,
2666                                        MatImaginaryPart_MPIBAIJ,
2667                                        0,
2668                                        0,
2669                                 /*109*/0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        MatMissingDiagonal_MPIBAIJ,
2674                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2675                                        0,
2676                                        MatGetGhosts_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*119*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatGetMultiProcBlock_MPIBAIJ,
2684                                 /*124*/0,
2685                                        MatGetColumnNorms_MPIBAIJ,
2686                                        MatInvertBlockDiagonal_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                /*129*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                /*134*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                /*139*/ MatSetBlockSizes_Default,
2700                                        0,
2701                                        0,
2702                                        MatFDColoringSetUp_MPIXAIJ,
2703                                        0,
2704                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2705 };
2706 
2707 
2708 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2709 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2710 
2711 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2712 {
2713   PetscInt       m,rstart,cstart,cend;
2714   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2715   const PetscInt *JJ    =0;
2716   PetscScalar    *values=0;
2717   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2722   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2723   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2724   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2725   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2726   m      = B->rmap->n/bs;
2727   rstart = B->rmap->rstart/bs;
2728   cstart = B->cmap->rstart/bs;
2729   cend   = B->cmap->rend/bs;
2730 
2731   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2732   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2733   for (i=0; i<m; i++) {
2734     nz = ii[i+1] - ii[i];
2735     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2736     nz_max = PetscMax(nz_max,nz);
2737     dlen   = 0;
2738     olen   = 0;
2739     JJ     = jj + ii[i];
2740     for (j=0; j<nz; j++) {
2741       if (*JJ < cstart || *JJ >= cend) olen++;
2742       else dlen++;
2743       JJ++;
2744     }
2745     d_nnz[i] = dlen;
2746     o_nnz[i] = olen;
2747   }
2748   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2749   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2750 
2751   values = (PetscScalar*)V;
2752   if (!values) {
2753     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2754   }
2755   for (i=0; i<m; i++) {
2756     PetscInt          row    = i + rstart;
2757     PetscInt          ncols  = ii[i+1] - ii[i];
2758     const PetscInt    *icols = jj + ii[i];
2759     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2760       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2761       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2762     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2763       PetscInt j;
2764       for (j=0; j<ncols; j++) {
2765         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2766         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2767       }
2768     }
2769   }
2770 
2771   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2772   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2774   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 /*@C
2779    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2780 
2781    Collective
2782 
2783    Input Parameters:
2784 +  B - the matrix
2785 .  bs - the block size
2786 .  i - the indices into j for the start of each local row (starts with zero)
2787 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2788 -  v - optional values in the matrix
2789 
2790    Level: advanced
2791 
2792    Notes:
2793     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2794    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2795    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2796    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2797    block column and the second index is over columns within a block.
2798 
2799    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2800 
2801 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2802 @*/
2803 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2804 {
2805   PetscErrorCode ierr;
2806 
2807   PetscFunctionBegin;
2808   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2809   PetscValidType(B,1);
2810   PetscValidLogicalCollectiveInt(B,bs,2);
2811   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2816 {
2817   Mat_MPIBAIJ    *b;
2818   PetscErrorCode ierr;
2819   PetscInt       i;
2820   PetscMPIInt    size;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2827 
2828   if (d_nnz) {
2829     for (i=0; i<B->rmap->n/bs; i++) {
2830       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2831     }
2832   }
2833   if (o_nnz) {
2834     for (i=0; i<B->rmap->n/bs; i++) {
2835       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2836     }
2837   }
2838 
2839   b      = (Mat_MPIBAIJ*)B->data;
2840   b->bs2 = bs*bs;
2841   b->mbs = B->rmap->n/bs;
2842   b->nbs = B->cmap->n/bs;
2843   b->Mbs = B->rmap->N/bs;
2844   b->Nbs = B->cmap->N/bs;
2845 
2846   for (i=0; i<=b->size; i++) {
2847     b->rangebs[i] = B->rmap->range[i]/bs;
2848   }
2849   b->rstartbs = B->rmap->rstart/bs;
2850   b->rendbs   = B->rmap->rend/bs;
2851   b->cstartbs = B->cmap->rstart/bs;
2852   b->cendbs   = B->cmap->rend/bs;
2853 
2854 #if defined(PETSC_USE_CTABLE)
2855   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2856 #else
2857   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2858 #endif
2859   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2860   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2861   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2862 
2863   /* Because the B will have been resized we simply destroy it and create a new one each time */
2864   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2865   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2866   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2867   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2868   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2869   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2870 
2871   if (!B->preallocated) {
2872     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2873     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2874     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2875     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2876     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2877   }
2878 
2879   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2880   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2881   B->preallocated  = PETSC_TRUE;
2882   B->was_assembled = PETSC_FALSE;
2883   B->assembled     = PETSC_FALSE;
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2888 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2889 
2890 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2891 {
2892   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2893   PetscErrorCode ierr;
2894   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2895   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2896   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2897 
2898   PetscFunctionBegin;
2899   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2900   ii[0] = 0;
2901   for (i=0; i<M; i++) {
2902     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2903     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2904     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2905     /* remove one from count of matrix has diagonal */
2906     for (j=id[i]; j<id[i+1]; j++) {
2907       if (jd[j] == i) {ii[i+1]--;break;}
2908     }
2909   }
2910   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2911   cnt  = 0;
2912   for (i=0; i<M; i++) {
2913     for (j=io[i]; j<io[i+1]; j++) {
2914       if (garray[jo[j]] > rstart) break;
2915       jj[cnt++] = garray[jo[j]];
2916     }
2917     for (k=id[i]; k<id[i+1]; k++) {
2918       if (jd[k] != i) {
2919         jj[cnt++] = rstart + jd[k];
2920       }
2921     }
2922     for (; j<io[i+1]; j++) {
2923       jj[cnt++] = garray[jo[j]];
2924     }
2925   }
2926   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2931 
2932 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2933 
2934 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2935 {
2936   PetscErrorCode ierr;
2937   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2938   Mat            B;
2939   Mat_MPIAIJ     *b;
2940 
2941   PetscFunctionBegin;
2942   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2943 
2944   if (reuse == MAT_REUSE_MATRIX) {
2945     B = *newmat;
2946   } else {
2947     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2948     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2949     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2950     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2951     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2952     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2953   }
2954   b = (Mat_MPIAIJ*) B->data;
2955 
2956   if (reuse == MAT_REUSE_MATRIX) {
2957     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2958     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2959   } else {
2960     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2961     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2962     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2963     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2964     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967   }
2968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2970 
2971   if (reuse == MAT_INPLACE_MATRIX) {
2972     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2973   } else {
2974    *newmat = B;
2975   }
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 /*MC
2980    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2981 
2982    Options Database Keys:
2983 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2984 . -mat_block_size <bs> - set the blocksize used to store the matrix
2985 - -mat_use_hash_table <fact>
2986 
2987    Level: beginner
2988 
2989    Notes:
2990     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2991     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2992 
2993 .seealso: MatCreateMPIBAIJ
2994 M*/
2995 
2996 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2997 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
2998 
2999 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3000 {
3001   Mat_MPIBAIJ    *b;
3002   PetscErrorCode ierr;
3003   PetscBool      flg = PETSC_FALSE;
3004 
3005   PetscFunctionBegin;
3006   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3007   B->data = (void*)b;
3008 
3009   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3010   B->assembled = PETSC_FALSE;
3011 
3012   B->insertmode = NOT_SET_VALUES;
3013   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3014   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3015 
3016   /* build local table of row and column ownerships */
3017   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3018 
3019   /* build cache for off array entries formed */
3020   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3021 
3022   b->donotstash  = PETSC_FALSE;
3023   b->colmap      = NULL;
3024   b->garray      = NULL;
3025   b->roworiented = PETSC_TRUE;
3026 
3027   /* stuff used in block assembly */
3028   b->barray = 0;
3029 
3030   /* stuff used for matrix vector multiply */
3031   b->lvec  = 0;
3032   b->Mvctx = 0;
3033 
3034   /* stuff for MatGetRow() */
3035   b->rowindices   = 0;
3036   b->rowvalues    = 0;
3037   b->getrowactive = PETSC_FALSE;
3038 
3039   /* hash table stuff */
3040   b->ht           = 0;
3041   b->hd           = 0;
3042   b->ht_size      = 0;
3043   b->ht_flag      = PETSC_FALSE;
3044   b->ht_fact      = 0;
3045   b->ht_total_ct  = 0;
3046   b->ht_insert_ct = 0;
3047 
3048   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3049   b->ijonly = PETSC_FALSE;
3050 
3051 
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3055 #if defined(PETSC_HAVE_HYPRE)
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3057 #endif
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3066   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3067 
3068   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3069   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3070   if (flg) {
3071     PetscReal fact = 1.39;
3072     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3073     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3074     if (fact <= 1.0) fact = 1.39;
3075     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3076     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3077   }
3078   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 /*MC
3083    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3084 
3085    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3086    and MATMPIBAIJ otherwise.
3087 
3088    Options Database Keys:
3089 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3090 
3091   Level: beginner
3092 
3093 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3094 M*/
3095 
3096 /*@C
3097    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3098    (block compressed row).  For good matrix assembly performance
3099    the user should preallocate the matrix storage by setting the parameters
3100    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3101    performance can be increased by more than a factor of 50.
3102 
3103    Collective on Mat
3104 
3105    Input Parameters:
3106 +  B - the matrix
3107 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3108           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3109 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3110            submatrix  (same for all local rows)
3111 .  d_nnz - array containing the number of block nonzeros in the various block rows
3112            of the in diagonal portion of the local (possibly different for each block
3113            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3114            set it even if it is zero.
3115 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3116            submatrix (same for all local rows).
3117 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3118            off-diagonal portion of the local submatrix (possibly different for
3119            each block row) or NULL.
3120 
3121    If the *_nnz parameter is given then the *_nz parameter is ignored
3122 
3123    Options Database Keys:
3124 +   -mat_block_size - size of the blocks to use
3125 -   -mat_use_hash_table <fact>
3126 
3127    Notes:
3128    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3129    than it must be used on all processors that share the object for that argument.
3130 
3131    Storage Information:
3132    For a square global matrix we define each processor's diagonal portion
3133    to be its local rows and the corresponding columns (a square submatrix);
3134    each processor's off-diagonal portion encompasses the remainder of the
3135    local matrix (a rectangular submatrix).
3136 
3137    The user can specify preallocated storage for the diagonal part of
3138    the local submatrix with either d_nz or d_nnz (not both).  Set
3139    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3140    memory allocation.  Likewise, specify preallocated storage for the
3141    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3142 
3143    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3144    the figure below we depict these three local rows and all columns (0-11).
3145 
3146 .vb
3147            0 1 2 3 4 5 6 7 8 9 10 11
3148           --------------------------
3149    row 3  |o o o d d d o o o o  o  o
3150    row 4  |o o o d d d o o o o  o  o
3151    row 5  |o o o d d d o o o o  o  o
3152           --------------------------
3153 .ve
3154 
3155    Thus, any entries in the d locations are stored in the d (diagonal)
3156    submatrix, and any entries in the o locations are stored in the
3157    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3158    stored simply in the MATSEQBAIJ format for compressed row storage.
3159 
3160    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3161    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3162    In general, for PDE problems in which most nonzeros are near the diagonal,
3163    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3164    or you will get TERRIBLE performance; see the users' manual chapter on
3165    matrices.
3166 
3167    You can call MatGetInfo() to get information on how effective the preallocation was;
3168    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3169    You can also run with the option -info and look for messages with the string
3170    malloc in them to see if additional memory allocation was needed.
3171 
3172    Level: intermediate
3173 
3174 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3175 @*/
3176 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3177 {
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3182   PetscValidType(B,1);
3183   PetscValidLogicalCollectiveInt(B,bs,2);
3184   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187 
3188 /*@C
3189    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3190    (block compressed row).  For good matrix assembly performance
3191    the user should preallocate the matrix storage by setting the parameters
3192    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3193    performance can be increased by more than a factor of 50.
3194 
3195    Collective
3196 
3197    Input Parameters:
3198 +  comm - MPI communicator
3199 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3200           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3201 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3202            This value should be the same as the local size used in creating the
3203            y vector for the matrix-vector product y = Ax.
3204 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3205            This value should be the same as the local size used in creating the
3206            x vector for the matrix-vector product y = Ax.
3207 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3208 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3209 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3210            submatrix  (same for all local rows)
3211 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3212            of the in diagonal portion of the local (possibly different for each block
3213            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3214            and set it even if it is zero.
3215 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3216            submatrix (same for all local rows).
3217 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3218            off-diagonal portion of the local submatrix (possibly different for
3219            each block row) or NULL.
3220 
3221    Output Parameter:
3222 .  A - the matrix
3223 
3224    Options Database Keys:
3225 +   -mat_block_size - size of the blocks to use
3226 -   -mat_use_hash_table <fact>
3227 
3228    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3229    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3230    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3231 
3232    Notes:
3233    If the *_nnz parameter is given then the *_nz parameter is ignored
3234 
3235    A nonzero block is any block that as 1 or more nonzeros in it
3236 
3237    The user MUST specify either the local or global matrix dimensions
3238    (possibly both).
3239 
3240    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3241    than it must be used on all processors that share the object for that argument.
3242 
3243    Storage Information:
3244    For a square global matrix we define each processor's diagonal portion
3245    to be its local rows and the corresponding columns (a square submatrix);
3246    each processor's off-diagonal portion encompasses the remainder of the
3247    local matrix (a rectangular submatrix).
3248 
3249    The user can specify preallocated storage for the diagonal part of
3250    the local submatrix with either d_nz or d_nnz (not both).  Set
3251    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3252    memory allocation.  Likewise, specify preallocated storage for the
3253    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3254 
3255    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3256    the figure below we depict these three local rows and all columns (0-11).
3257 
3258 .vb
3259            0 1 2 3 4 5 6 7 8 9 10 11
3260           --------------------------
3261    row 3  |o o o d d d o o o o  o  o
3262    row 4  |o o o d d d o o o o  o  o
3263    row 5  |o o o d d d o o o o  o  o
3264           --------------------------
3265 .ve
3266 
3267    Thus, any entries in the d locations are stored in the d (diagonal)
3268    submatrix, and any entries in the o locations are stored in the
3269    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3270    stored simply in the MATSEQBAIJ format for compressed row storage.
3271 
3272    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3273    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3274    In general, for PDE problems in which most nonzeros are near the diagonal,
3275    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3276    or you will get TERRIBLE performance; see the users' manual chapter on
3277    matrices.
3278 
3279    Level: intermediate
3280 
3281 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3282 @*/
3283 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3284 {
3285   PetscErrorCode ierr;
3286   PetscMPIInt    size;
3287 
3288   PetscFunctionBegin;
3289   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3290   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3291   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3292   if (size > 1) {
3293     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3294     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3295   } else {
3296     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3297     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3298   }
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3303 {
3304   Mat            mat;
3305   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3306   PetscErrorCode ierr;
3307   PetscInt       len=0;
3308 
3309   PetscFunctionBegin;
3310   *newmat = 0;
3311   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3312   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3313   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3314 
3315   mat->factortype   = matin->factortype;
3316   mat->preallocated = PETSC_TRUE;
3317   mat->assembled    = PETSC_TRUE;
3318   mat->insertmode   = NOT_SET_VALUES;
3319 
3320   a             = (Mat_MPIBAIJ*)mat->data;
3321   mat->rmap->bs = matin->rmap->bs;
3322   a->bs2        = oldmat->bs2;
3323   a->mbs        = oldmat->mbs;
3324   a->nbs        = oldmat->nbs;
3325   a->Mbs        = oldmat->Mbs;
3326   a->Nbs        = oldmat->Nbs;
3327 
3328   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3329   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3330 
3331   a->size         = oldmat->size;
3332   a->rank         = oldmat->rank;
3333   a->donotstash   = oldmat->donotstash;
3334   a->roworiented  = oldmat->roworiented;
3335   a->rowindices   = 0;
3336   a->rowvalues    = 0;
3337   a->getrowactive = PETSC_FALSE;
3338   a->barray       = 0;
3339   a->rstartbs     = oldmat->rstartbs;
3340   a->rendbs       = oldmat->rendbs;
3341   a->cstartbs     = oldmat->cstartbs;
3342   a->cendbs       = oldmat->cendbs;
3343 
3344   /* hash table stuff */
3345   a->ht           = 0;
3346   a->hd           = 0;
3347   a->ht_size      = 0;
3348   a->ht_flag      = oldmat->ht_flag;
3349   a->ht_fact      = oldmat->ht_fact;
3350   a->ht_total_ct  = 0;
3351   a->ht_insert_ct = 0;
3352 
3353   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3354   if (oldmat->colmap) {
3355 #if defined(PETSC_USE_CTABLE)
3356     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3357 #else
3358     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3359     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3360     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3361 #endif
3362   } else a->colmap = 0;
3363 
3364   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3365     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3366     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3367     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3368   } else a->garray = 0;
3369 
3370   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3371   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3372   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3373   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3374   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3375 
3376   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3377   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3378   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3379   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3380   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3381   *newmat = mat;
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3386 {
3387   PetscErrorCode ierr;
3388   int            fd;
3389   PetscInt       i,nz,j,rstart,rend;
3390   PetscScalar    *vals,*buf;
3391   MPI_Comm       comm;
3392   MPI_Status     status;
3393   PetscMPIInt    rank,size,maxnz;
3394   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3395   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3396   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3397   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3398   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3399   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3400   PetscBool      isbinary;
3401 
3402   PetscFunctionBegin;
3403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3404   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3405 
3406   /* force binary viewer to load .info file if it has not yet done so */
3407   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3408   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3409   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3410   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3411   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3412   if (bs < 0) bs = 1;
3413 
3414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3416   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3417   if (!rank) {
3418     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3419     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3420     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3421   }
3422   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3423   M    = header[1]; N = header[2];
3424 
3425   /* If global sizes are set, check if they are consistent with that given in the file */
3426   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3427   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3428 
3429   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3430 
3431   /*
3432      This code adds extra rows to make sure the number of rows is
3433      divisible by the blocksize
3434   */
3435   Mbs        = M/bs;
3436   extra_rows = bs - M + bs*Mbs;
3437   if (extra_rows == bs) extra_rows = 0;
3438   else                  Mbs++;
3439   if (extra_rows && !rank) {
3440     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3441   }
3442 
3443   /* determine ownership of all rows */
3444   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3445     mbs = Mbs/size + ((Mbs % size) > rank);
3446     m   = mbs*bs;
3447   } else { /* User set */
3448     m   = newmat->rmap->n;
3449     mbs = m/bs;
3450   }
3451   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3452   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3453 
3454   /* process 0 needs enough room for process with most rows */
3455   if (!rank) {
3456     mmax = rowners[1];
3457     for (i=2; i<=size; i++) {
3458       mmax = PetscMax(mmax,rowners[i]);
3459     }
3460     mmax*=bs;
3461   } else mmax = -1;             /* unused, but compiler warns anyway */
3462 
3463   rowners[0] = 0;
3464   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3465   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3466   rstart = rowners[rank];
3467   rend   = rowners[rank+1];
3468 
3469   /* distribute row lengths to all processors */
3470   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3471   if (!rank) {
3472     mend = m;
3473     if (size == 1) mend = mend - extra_rows;
3474     ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3475     for (j=mend; j<m; j++) locrowlens[j] = 1;
3476     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3477     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
3478     procsnz[0] = 0;
3479     for (j=0; j<m; j++) {
3480       procsnz[0] += locrowlens[j];
3481     }
3482     for (i=1; i<size; i++) {
3483       mend = browners[i+1] - browners[i];
3484       if (i == size-1) mend = mend - extra_rows;
3485       ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3486       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3487       /* calculate the number of nonzeros on each processor */
3488       procsnz[i] = 0;
3489       for (j=0; j<browners[i+1]-browners[i]; j++) {
3490         procsnz[i] += rowlengths[j];
3491       }
3492       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3493     }
3494     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3495   } else {
3496     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3497   }
3498 
3499   if (!rank) {
3500     /* determine max buffer needed and allocate it */
3501     maxnz = procsnz[0];
3502     for (i=1; i<size; i++) {
3503       maxnz = PetscMax(maxnz,procsnz[i]);
3504     }
3505     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3506 
3507     /* read in my part of the matrix column indices  */
3508     nz     = procsnz[0];
3509     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3510     mycols = ibuf;
3511     if (size == 1) nz -= extra_rows;
3512     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3513     if (size == 1) {
3514       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3515     }
3516 
3517     /* read in every ones (except the last) and ship off */
3518     for (i=1; i<size-1; i++) {
3519       nz   = procsnz[i];
3520       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3521       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3522     }
3523     /* read in the stuff for the last proc */
3524     if (size != 1) {
3525       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3526       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3527       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3528       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3529     }
3530     ierr = PetscFree(cols);CHKERRQ(ierr);
3531   } else {
3532     /* determine buffer space needed for message */
3533     nz = 0;
3534     for (i=0; i<m; i++) {
3535       nz += locrowlens[i];
3536     }
3537     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3538     mycols = ibuf;
3539     /* receive message of column indices*/
3540     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3541     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3542     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3543   }
3544 
3545   /* loop over local rows, determining number of off diagonal entries */
3546   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3547   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3548   rowcount = 0; nzcount = 0;
3549   for (i=0; i<mbs; i++) {
3550     dcount  = 0;
3551     odcount = 0;
3552     for (j=0; j<bs; j++) {
3553       kmax = locrowlens[rowcount];
3554       for (k=0; k<kmax; k++) {
3555         tmp = mycols[nzcount++]/bs;
3556         if (!mask[tmp]) {
3557           mask[tmp] = 1;
3558           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3559           else masked1[dcount++] = tmp;
3560         }
3561       }
3562       rowcount++;
3563     }
3564 
3565     dlens[i]  = dcount;
3566     odlens[i] = odcount;
3567 
3568     /* zero out the mask elements we set */
3569     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3570     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3571   }
3572 
3573   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3574   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3575 
3576   if (!rank) {
3577     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3578     /* read in my part of the matrix numerical values  */
3579     nz     = procsnz[0];
3580     vals   = buf;
3581     mycols = ibuf;
3582     if (size == 1) nz -= extra_rows;
3583     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3584     if (size == 1) {
3585       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3586     }
3587 
3588     /* insert into matrix */
3589     jj = rstart*bs;
3590     for (i=0; i<m; i++) {
3591       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3592       mycols += locrowlens[i];
3593       vals   += locrowlens[i];
3594       jj++;
3595     }
3596     /* read in other processors (except the last one) and ship out */
3597     for (i=1; i<size-1; i++) {
3598       nz   = procsnz[i];
3599       vals = buf;
3600       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3601       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3602     }
3603     /* the last proc */
3604     if (size != 1) {
3605       nz   = procsnz[i] - extra_rows;
3606       vals = buf;
3607       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3608       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3609       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3610     }
3611     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3612   } else {
3613     /* receive numeric values */
3614     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3615 
3616     /* receive message of values*/
3617     vals   = buf;
3618     mycols = ibuf;
3619     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3620 
3621     /* insert into matrix */
3622     jj = rstart*bs;
3623     for (i=0; i<m; i++) {
3624       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3625       mycols += locrowlens[i];
3626       vals   += locrowlens[i];
3627       jj++;
3628     }
3629   }
3630   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3631   ierr = PetscFree(buf);CHKERRQ(ierr);
3632   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3633   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3634   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3635   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3636   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 /*@
3642    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3643 
3644    Input Parameters:
3645 +  mat  - the matrix
3646 -  fact - factor
3647 
3648    Not Collective, each process can use a different factor
3649 
3650    Level: advanced
3651 
3652   Notes:
3653    This can also be set by the command line option: -mat_use_hash_table <fact>
3654 
3655 .seealso: MatSetOption()
3656 @*/
3657 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3658 {
3659   PetscErrorCode ierr;
3660 
3661   PetscFunctionBegin;
3662   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3667 {
3668   Mat_MPIBAIJ *baij;
3669 
3670   PetscFunctionBegin;
3671   baij          = (Mat_MPIBAIJ*)mat->data;
3672   baij->ht_fact = fact;
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3677 {
3678   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3679   PetscBool      flg;
3680   PetscErrorCode ierr;
3681 
3682   PetscFunctionBegin;
3683   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3684   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3685   if (Ad)     *Ad     = a->A;
3686   if (Ao)     *Ao     = a->B;
3687   if (colmap) *colmap = a->garray;
3688   PetscFunctionReturn(0);
3689 }
3690 
3691 /*
3692     Special version for direct calls from Fortran (to eliminate two function call overheads
3693 */
3694 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3695 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3696 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3697 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3698 #endif
3699 
3700 /*@C
3701   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3702 
3703   Collective on Mat
3704 
3705   Input Parameters:
3706 + mat - the matrix
3707 . min - number of input rows
3708 . im - input rows
3709 . nin - number of input columns
3710 . in - input columns
3711 . v - numerical values input
3712 - addvin - INSERT_VALUES or ADD_VALUES
3713 
3714   Notes:
3715     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3716 
3717   Level: advanced
3718 
3719 .seealso:   MatSetValuesBlocked()
3720 @*/
3721 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3722 {
3723   /* convert input arguments to C version */
3724   Mat        mat  = *matin;
3725   PetscInt   m    = *min, n = *nin;
3726   InsertMode addv = *addvin;
3727 
3728   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3729   const MatScalar *value;
3730   MatScalar       *barray     = baij->barray;
3731   PetscBool       roworiented = baij->roworiented;
3732   PetscErrorCode  ierr;
3733   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3734   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3735   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3736 
3737   PetscFunctionBegin;
3738   /* tasks normally handled by MatSetValuesBlocked() */
3739   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3740 #if defined(PETSC_USE_DEBUG)
3741   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3742   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3743 #endif
3744   if (mat->assembled) {
3745     mat->was_assembled = PETSC_TRUE;
3746     mat->assembled     = PETSC_FALSE;
3747   }
3748   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3749 
3750 
3751   if (!barray) {
3752     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3753     baij->barray = barray;
3754   }
3755 
3756   if (roworiented) stepval = (n-1)*bs;
3757   else stepval = (m-1)*bs;
3758 
3759   for (i=0; i<m; i++) {
3760     if (im[i] < 0) continue;
3761 #if defined(PETSC_USE_DEBUG)
3762     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3763 #endif
3764     if (im[i] >= rstart && im[i] < rend) {
3765       row = im[i] - rstart;
3766       for (j=0; j<n; j++) {
3767         /* If NumCol = 1 then a copy is not required */
3768         if ((roworiented) && (n == 1)) {
3769           barray = (MatScalar*)v + i*bs2;
3770         } else if ((!roworiented) && (m == 1)) {
3771           barray = (MatScalar*)v + j*bs2;
3772         } else { /* Here a copy is required */
3773           if (roworiented) {
3774             value = v + i*(stepval+bs)*bs + j*bs;
3775           } else {
3776             value = v + j*(stepval+bs)*bs + i*bs;
3777           }
3778           for (ii=0; ii<bs; ii++,value+=stepval) {
3779             for (jj=0; jj<bs; jj++) {
3780               *barray++ = *value++;
3781             }
3782           }
3783           barray -=bs2;
3784         }
3785 
3786         if (in[j] >= cstart && in[j] < cend) {
3787           col  = in[j] - cstart;
3788           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3789         } else if (in[j] < 0) continue;
3790 #if defined(PETSC_USE_DEBUG)
3791         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3792 #endif
3793         else {
3794           if (mat->was_assembled) {
3795             if (!baij->colmap) {
3796               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3797             }
3798 
3799 #if defined(PETSC_USE_DEBUG)
3800 #if defined(PETSC_USE_CTABLE)
3801             { PetscInt data;
3802               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3803               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3804             }
3805 #else
3806             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3807 #endif
3808 #endif
3809 #if defined(PETSC_USE_CTABLE)
3810             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3811             col  = (col - 1)/bs;
3812 #else
3813             col = (baij->colmap[in[j]] - 1)/bs;
3814 #endif
3815             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3816               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3817               col  =  in[j];
3818             }
3819           } else col = in[j];
3820           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3821         }
3822       }
3823     } else {
3824       if (!baij->donotstash) {
3825         if (roworiented) {
3826           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3827         } else {
3828           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3829         }
3830       }
3831     }
3832   }
3833 
3834   /* task normally handled by MatSetValuesBlocked() */
3835   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*@
3840      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3841          CSR format the local rows.
3842 
3843    Collective
3844 
3845    Input Parameters:
3846 +  comm - MPI communicator
3847 .  bs - the block size, only a block size of 1 is supported
3848 .  m - number of local rows (Cannot be PETSC_DECIDE)
3849 .  n - This value should be the same as the local size used in creating the
3850        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3851        calculated if N is given) For square matrices n is almost always m.
3852 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3853 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3854 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3855 .   j - column indices
3856 -   a - matrix values
3857 
3858    Output Parameter:
3859 .   mat - the matrix
3860 
3861    Level: intermediate
3862 
3863    Notes:
3864        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3865      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3866      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3867 
3868      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3869      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3870      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3871      with column-major ordering within blocks.
3872 
3873        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3874 
3875 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3876           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3877 @*/
3878 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3879 {
3880   PetscErrorCode ierr;
3881 
3882   PetscFunctionBegin;
3883   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3884   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3885   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3886   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3887   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3888   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3889   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3890   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3891   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3892   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3897 {
3898   PetscErrorCode ierr;
3899   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3900   PetscInt       *indx;
3901   PetscScalar    *values;
3902 
3903   PetscFunctionBegin;
3904   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3905   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3906     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3907     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3908     PetscInt       *bindx,rmax=a->rmax,j;
3909     PetscMPIInt    rank,size;
3910 
3911     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3912     mbs = m/bs; Nbs = N/cbs;
3913     if (n == PETSC_DECIDE) {
3914       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3915     }
3916     nbs = n/cbs;
3917 
3918     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3919     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3920 
3921     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3922     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3923     if (rank == size-1) {
3924       /* Check sum(nbs) = Nbs */
3925       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3926     }
3927 
3928     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3929     for (i=0; i<mbs; i++) {
3930       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3931       nnz = nnz/bs;
3932       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3933       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3934       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3935     }
3936     ierr = PetscFree(bindx);CHKERRQ(ierr);
3937 
3938     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3939     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3940     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3941     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3942     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3943     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3944     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3945   }
3946 
3947   /* numeric phase */
3948   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3949   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3950 
3951   for (i=0; i<m; i++) {
3952     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3953     Ii   = i + rstart;
3954     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3955     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3956   }
3957   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3958   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959   PetscFunctionReturn(0);
3960 }
3961