xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 534a8f05a7a8aff70dd8cfd53d9cd834400a8dbf)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1107 {
1108   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1109   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1110   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1113   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1114   int            fd;
1115   PetscScalar    *column_values;
1116   FILE           *file;
1117   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1118   PetscInt       message_count,flowcontrolcount;
1119 
1120   PetscFunctionBegin;
1121   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1122   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1123   nz   = bs2*(A->nz + B->nz);
1124   rlen = mat->rmap->n;
1125   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1126   if (!rank) {
1127     header[0] = MAT_FILE_CLASSID;
1128     header[1] = mat->rmap->N;
1129     header[2] = mat->cmap->N;
1130 
1131     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1132     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     /* get largest number of rows any processor has */
1134     range = mat->rmap->range;
1135     for (i=1; i<size; i++) {
1136       rlen = PetscMax(rlen,range[i+1] - range[i]);
1137     }
1138   } else {
1139     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1140   }
1141 
1142   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1143   /* compute lengths of each row  */
1144   for (i=0; i<a->mbs; i++) {
1145     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1146   }
1147   /* store the row lengths to the file */
1148   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1149   if (!rank) {
1150     MPI_Status status;
1151     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1152     rlen = (range[1] - range[0])/bs;
1153     for (i=0; i<rlen; i++) {
1154       for (j=0; j<bs; j++) {
1155         row_lens[i*bs+j] = bs*crow_lens[i];
1156       }
1157     }
1158     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     for (i=1; i<size; i++) {
1160       rlen = (range[i+1] - range[i])/bs;
1161       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1162       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1163       for (k=0; k<rlen; k++) {
1164         for (j=0; j<bs; j++) {
1165           row_lens[k*bs+j] = bs*crow_lens[k];
1166         }
1167       }
1168       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1171     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1172   } else {
1173     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1174     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1176   }
1177   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1178 
1179   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1180      information needed to make it for each row from a block row. This does require more communication but still not more than
1181      the communication needed for the nonzero values  */
1182   nzmax = nz; /*  space a largest processor needs */
1183   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1185   cnt   = 0;
1186   for (i=0; i<a->mbs; i++) {
1187     pcnt = cnt;
1188     for (j=B->i[i]; j<B->i[i+1]; j++) {
1189       if ((col = garray[B->j[j]]) > cstart) break;
1190       for (l=0; l<bs; l++) {
1191         column_indices[cnt++] = bs*col+l;
1192       }
1193     }
1194     for (k=A->i[i]; k<A->i[i+1]; k++) {
1195       for (l=0; l<bs; l++) {
1196         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1197       }
1198     }
1199     for (; j<B->i[i+1]; j++) {
1200       for (l=0; l<bs; l++) {
1201         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1202       }
1203     }
1204     len = cnt - pcnt;
1205     for (k=1; k<bs; k++) {
1206       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1207       cnt += len;
1208     }
1209   }
1210   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1211 
1212   /* store the columns to the file */
1213   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1214   if (!rank) {
1215     MPI_Status status;
1216     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1217     for (i=1; i<size; i++) {
1218       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1219       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1220       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the numerical values */
1233   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<a->mbs; i++) {
1236     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1237     for (j=B->i[i]; j<B->i[i+1]; j++) {
1238       if (garray[B->j[j]] > cstart) break;
1239       for (l=0; l<bs; l++) {
1240         for (ll=0; ll<bs; ll++) {
1241           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1242         }
1243       }
1244       cnt += bs;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) {
1247       for (l=0; l<bs; l++) {
1248         for (ll=0; ll<bs; ll++) {
1249           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1250         }
1251       }
1252       cnt += bs;
1253     }
1254     for (; j<B->i[i+1]; j++) {
1255       for (l=0; l<bs; l++) {
1256         for (ll=0; ll<bs; ll++) {
1257           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1258         }
1259       }
1260       cnt += bs;
1261     }
1262     cnt += (bs-1)*rlen;
1263   }
1264   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1265 
1266   /* store the column values to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) {
1288     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1294 {
1295   PetscErrorCode ierr;
1296   PetscBool      iascii,isdraw,issocket,isbinary;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1300   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1301   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1303   if (iascii || isdraw || issocket) {
1304     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1305   } else if (isbinary) {
1306     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1312 {
1313   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317 #if defined(PETSC_USE_LOG)
1318   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1319 #endif
1320   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1321   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1322   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1323   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1324 #if defined(PETSC_USE_CTABLE)
1325   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1326 #else
1327   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1328 #endif
1329   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1330   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1331   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1332   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1333   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1334   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1335   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1336   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1337 
1338   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1340   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1341   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1342   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1343   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1344   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1345   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1347 #if defined(PETSC_HAVE_HYPRE)
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1349 #endif
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1356 {
1357   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359   PetscInt       nt;
1360 
1361   PetscFunctionBegin;
1362   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1363   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1364   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1365   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1366   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1367   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1368   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1369   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1382   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1387 {
1388   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1389   PetscErrorCode ierr;
1390 
1391   PetscFunctionBegin;
1392   /* do nondiagonal part */
1393   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1394   /* do local part */
1395   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1396   /* add partial results together */
1397   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1398   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   /* do nondiagonal part */
1409   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1410   /* do local part */
1411   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1412   /* add partial results together */
1413   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1414   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*
1419   This only works correctly for square matrices where the subblock A->A is the
1420    diagonal block
1421 */
1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1440   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1445 {
1446   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1447   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1448   PetscErrorCode ierr;
1449   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1450   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1451   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1452 
1453   PetscFunctionBegin;
1454   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1455   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1456   mat->getrowactive = PETSC_TRUE;
1457 
1458   if (!mat->rowvalues && (idx || v)) {
1459     /*
1460         allocate enough space to hold information from the longest row.
1461     */
1462     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1463     PetscInt    max = 1,mbs = mat->mbs,tmp;
1464     for (i=0; i<mbs; i++) {
1465       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1466       if (max < tmp) max = tmp;
1467     }
1468     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1469   }
1470   lrow = row - brstart;
1471 
1472   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1473   if (!v)   {pvA = 0; pvB = 0;}
1474   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1475   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1476   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1477   nztot = nzA + nzB;
1478 
1479   cmap = mat->garray;
1480   if (v  || idx) {
1481     if (nztot) {
1482       /* Sort by increasing column numbers, assuming A and B already sorted */
1483       PetscInt imark = -1;
1484       if (v) {
1485         *v = v_p = mat->rowvalues;
1486         for (i=0; i<nzB; i++) {
1487           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1488           else break;
1489         }
1490         imark = i;
1491         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1492         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1493       }
1494       if (idx) {
1495         *idx = idx_p = mat->rowindices;
1496         if (imark > -1) {
1497           for (i=0; i<imark; i++) {
1498             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1499           }
1500         } else {
1501           for (i=0; i<nzB; i++) {
1502             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1503             else break;
1504           }
1505           imark = i;
1506         }
1507         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1508         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1509       }
1510     } else {
1511       if (idx) *idx = 0;
1512       if (v)   *v   = 0;
1513     }
1514   }
1515   *nz  = nztot;
1516   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1517   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1532 {
1533   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1538   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1543 {
1544   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1545   Mat            A  = a->A,B = a->B;
1546   PetscErrorCode ierr;
1547   PetscReal      isend[5],irecv[5];
1548 
1549   PetscFunctionBegin;
1550   info->block_size = (PetscReal)matin->rmap->bs;
1551 
1552   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1553 
1554   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1555   isend[3] = info->memory;  isend[4] = info->mallocs;
1556 
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558 
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561 
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else if (flag == MAT_GLOBAL_SUM) {
1577     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1578 
1579     info->nz_used      = irecv[0];
1580     info->nz_allocated = irecv[1];
1581     info->nz_unneeded  = irecv[2];
1582     info->memory       = irecv[3];
1583     info->mallocs      = irecv[4];
1584   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     MatCheckPreallocated(A,1);
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     MatCheckPreallocated(A,1);
1609     a->roworiented = flg;
1610 
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615   case MAT_SORTED_FULL:
1616     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1617     break;
1618   case MAT_IGNORE_OFF_PROC_ENTRIES:
1619     a->donotstash = flg;
1620     break;
1621   case MAT_USE_HASH_TABLE:
1622     a->ht_flag = flg;
1623     a->ht_fact = 1.39;
1624     break;
1625   case MAT_SYMMETRIC:
1626   case MAT_STRUCTURALLY_SYMMETRIC:
1627   case MAT_HERMITIAN:
1628   case MAT_SUBMAT_SINGLEIS:
1629   case MAT_SYMMETRY_ETERNAL:
1630     MatCheckPreallocated(A,1);
1631     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1632     break;
1633   default:
1634     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1642   Mat_SeqBAIJ    *Aloc;
1643   Mat            B;
1644   PetscErrorCode ierr;
1645   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1646   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1647   MatScalar      *a;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1651     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1652     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1653     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1654     /* Do not know preallocation information, but must set block size */
1655     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1656   } else {
1657     B = *matout;
1658   }
1659 
1660   /* copy over the A part */
1661   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1662   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1663   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1664 
1665   for (i=0; i<mbs; i++) {
1666     rvals[0] = bs*(baij->rstartbs + i);
1667     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668     for (j=ai[i]; j<ai[i+1]; j++) {
1669       col = (baij->cstartbs+aj[j])*bs;
1670       for (k=0; k<bs; k++) {
1671         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1672 
1673         col++; a += bs;
1674       }
1675     }
1676   }
1677   /* copy over the B part */
1678   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1679   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1680   for (i=0; i<mbs; i++) {
1681     rvals[0] = bs*(baij->rstartbs + i);
1682     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1683     for (j=ai[i]; j<ai[i+1]; j++) {
1684       col = baij->garray[aj[j]]*bs;
1685       for (k=0; k<bs; k++) {
1686         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1687         col++;
1688         a += bs;
1689       }
1690     }
1691   }
1692   ierr = PetscFree(rvals);CHKERRQ(ierr);
1693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695 
1696   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1697   else {
1698     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1704 {
1705   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1706   Mat            a     = baij->A,b = baij->B;
1707   PetscErrorCode ierr;
1708   PetscInt       s1,s2,s3;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1712   if (rr) {
1713     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1714     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1715     /* Overlap communication with computation. */
1716     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717   }
1718   if (ll) {
1719     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1720     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1721     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1722   }
1723   /* scale  the diagonal block */
1724   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1725 
1726   if (rr) {
1727     /* Do a scatter end and then right scale the off-diagonal block */
1728     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1735 {
1736   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1737   PetscInt      *lrows;
1738   PetscInt       r, len;
1739   PetscBool      cong;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   /* get locally owned rows */
1744   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1745   /* fix right hand side if needed */
1746   if (x && b) {
1747     const PetscScalar *xx;
1748     PetscScalar       *bb;
1749 
1750     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1751     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1752     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1753     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1754     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1755   }
1756 
1757   /* actually zap the local rows */
1758   /*
1759         Zero the required rows. If the "diagonal block" of the matrix
1760      is square and the user wishes to set the diagonal we use separate
1761      code so that MatSetValues() is not called for each diagonal allocating
1762      new memory, thus calling lots of mallocs and slowing things down.
1763 
1764   */
1765   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1766   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1767   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1768   if ((diag != 0.0) && cong) {
1769     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1770   } else if (diag != 0.0) {
1771     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1772     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1773        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1774     for (r = 0; r < len; ++r) {
1775       const PetscInt row = lrows[r] + A->rmap->rstart;
1776       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1777     }
1778     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1780   } else {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1782   }
1783   ierr = PetscFree(lrows);CHKERRQ(ierr);
1784 
1785   /* only change matrix nonzero state if pattern was allowed to be changed */
1786   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1787     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1788     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1794 {
1795   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1796   PetscErrorCode    ierr;
1797   PetscMPIInt       n = A->rmap->n;
1798   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1799   PetscInt          *lrows,*owners = A->rmap->range;
1800   PetscSFNode       *rrows;
1801   PetscSF           sf;
1802   const PetscScalar *xx;
1803   PetscScalar       *bb,*mask;
1804   Vec               xmask,lmask;
1805   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1806   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1807   PetscScalar       *aa;
1808 
1809   PetscFunctionBegin;
1810   /* Create SF where leaves are input rows and roots are owned rows */
1811   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1812   for (r = 0; r < n; ++r) lrows[r] = -1;
1813   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1814   for (r = 0; r < N; ++r) {
1815     const PetscInt idx   = rows[r];
1816     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1817     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1818       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1819     }
1820     rrows[r].rank  = p;
1821     rrows[r].index = rows[r] - owners[p];
1822   }
1823   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1824   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1825   /* Collect flags for rows to be zeroed */
1826   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1827   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1828   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1829   /* Compress and put in row numbers */
1830   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1831   /* zero diagonal part of matrix */
1832   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1833   /* handle off diagonal part of matrix */
1834   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1835   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1836   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1837   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1838   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1839   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1840   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1841   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1842   if (x) {
1843     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1844     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1845     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1846     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1847   }
1848   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1849   /* remove zeroed rows of off diagonal matrix */
1850   for (i = 0; i < len; ++i) {
1851     row   = lrows[i];
1852     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1853     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1854     for (k = 0; k < count; ++k) {
1855       aa[0] = 0.0;
1856       aa   += bs;
1857     }
1858   }
1859   /* loop over all elements of off process part of matrix zeroing removed columns*/
1860   for (i = 0; i < l->B->rmap->N; ++i) {
1861     row = i/bs;
1862     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1863       for (k = 0; k < bs; ++k) {
1864         col = bs*baij->j[j] + k;
1865         if (PetscAbsScalar(mask[col])) {
1866           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1867           if (x) bb[i] -= aa[0]*xx[col];
1868           aa[0] = 0.0;
1869         }
1870       }
1871     }
1872   }
1873   if (x) {
1874     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1875     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1876   }
1877   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1878   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1879   ierr = PetscFree(lrows);CHKERRQ(ierr);
1880 
1881   /* only change matrix nonzero state if pattern was allowed to be changed */
1882   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1883     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1884     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1890 {
1891   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1900 
1901 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1902 {
1903   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1904   Mat            a,b,c,d;
1905   PetscBool      flg;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   a = matA->A; b = matA->B;
1910   c = matB->A; d = matB->B;
1911 
1912   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1913   if (flg) {
1914     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1915   }
1916   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1921 {
1922   PetscErrorCode ierr;
1923   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1924   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1925 
1926   PetscFunctionBegin;
1927   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1928   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1929     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1930   } else {
1931     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1932     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1933   }
1934   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1939 {
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1948 {
1949   PetscErrorCode ierr;
1950   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1951   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1952   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1953 
1954   PetscFunctionBegin;
1955   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1960 {
1961   PetscErrorCode ierr;
1962   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1963   PetscBLASInt   bnz,one=1;
1964   Mat_SeqBAIJ    *x,*y;
1965   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1966 
1967   PetscFunctionBegin;
1968   if (str == SAME_NONZERO_PATTERN) {
1969     PetscScalar alpha = a;
1970     x    = (Mat_SeqBAIJ*)xx->A->data;
1971     y    = (Mat_SeqBAIJ*)yy->A->data;
1972     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1973     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1974     x    = (Mat_SeqBAIJ*)xx->B->data;
1975     y    = (Mat_SeqBAIJ*)yy->B->data;
1976     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1977     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1978     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1979   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   } else {
1982     Mat      B;
1983     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1984     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1985     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1986     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1987     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1988     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1989     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1990     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1991     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1992     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1993     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1994     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1995     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1996     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1997     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1998     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2004 {
2005   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2010   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2015 {
2016   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2017   PetscErrorCode ierr;
2018 
2019   PetscFunctionBegin;
2020   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2021   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2026 {
2027   PetscErrorCode ierr;
2028   IS             iscol_local;
2029   PetscInt       csize;
2030 
2031   PetscFunctionBegin;
2032   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2033   if (call == MAT_REUSE_MATRIX) {
2034     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2035     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2036   } else {
2037     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2038   }
2039   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2040   if (call == MAT_INITIAL_MATRIX) {
2041     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2042     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 /*
2048   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2049   in local and then by concatenating the local matrices the end result.
2050   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2051   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2052 */
2053 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2054 {
2055   PetscErrorCode ierr;
2056   PetscMPIInt    rank,size;
2057   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2058   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2059   Mat            M,Mreuse;
2060   MatScalar      *vwork,*aa;
2061   MPI_Comm       comm;
2062   IS             isrow_new, iscol_new;
2063   Mat_SeqBAIJ    *aij;
2064 
2065   PetscFunctionBegin;
2066   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069   /* The compression and expansion should be avoided. Doesn't point
2070      out errors, might change the indices, hence buggey */
2071   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2072   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2073 
2074   if (call ==  MAT_REUSE_MATRIX) {
2075     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2076     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2077     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2078   } else {
2079     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2080   }
2081   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2082   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2083   /*
2084       m - number of local rows
2085       n - number of columns (same on all processors)
2086       rstart - first row in new global matrix generated
2087   */
2088   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2089   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2090   m    = m/bs;
2091   n    = n/bs;
2092 
2093   if (call == MAT_INITIAL_MATRIX) {
2094     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2095     ii  = aij->i;
2096     jj  = aij->j;
2097 
2098     /*
2099         Determine the number of non-zeros in the diagonal and off-diagonal
2100         portions of the matrix in order to do correct preallocation
2101     */
2102 
2103     /* first get start and end of "diagonal" columns */
2104     if (csize == PETSC_DECIDE) {
2105       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2106       if (mglobal == n*bs) { /* square matrix */
2107         nlocal = m;
2108       } else {
2109         nlocal = n/size + ((n % size) > rank);
2110       }
2111     } else {
2112       nlocal = csize/bs;
2113     }
2114     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2115     rstart = rend - nlocal;
2116     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2117 
2118     /* next, compute all the lengths */
2119     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2120     for (i=0; i<m; i++) {
2121       jend = ii[i+1] - ii[i];
2122       olen = 0;
2123       dlen = 0;
2124       for (j=0; j<jend; j++) {
2125         if (*jj < rstart || *jj >= rend) olen++;
2126         else dlen++;
2127         jj++;
2128       }
2129       olens[i] = olen;
2130       dlens[i] = dlen;
2131     }
2132     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2133     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2134     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2135     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2136     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2137     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2138   } else {
2139     PetscInt ml,nl;
2140 
2141     M    = *newmat;
2142     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2143     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2144     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2145     /*
2146          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2147        rather than the slower MatSetValues().
2148     */
2149     M->was_assembled = PETSC_TRUE;
2150     M->assembled     = PETSC_FALSE;
2151   }
2152   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2153   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2154   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2155   ii   = aij->i;
2156   jj   = aij->j;
2157   aa   = aij->a;
2158   for (i=0; i<m; i++) {
2159     row   = rstart/bs + i;
2160     nz    = ii[i+1] - ii[i];
2161     cwork = jj;     jj += nz;
2162     vwork = aa;     aa += nz*bs*bs;
2163     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2164   }
2165 
2166   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   *newmat = M;
2169 
2170   /* save submatrix used in processor for next request */
2171   if (call ==  MAT_INITIAL_MATRIX) {
2172     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2173     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2179 {
2180   MPI_Comm       comm,pcomm;
2181   PetscInt       clocal_size,nrows;
2182   const PetscInt *rows;
2183   PetscMPIInt    size;
2184   IS             crowp,lcolp;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2189   /* make a collective version of 'rowp' */
2190   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2191   if (pcomm==comm) {
2192     crowp = rowp;
2193   } else {
2194     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2195     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2196     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2197     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2198   }
2199   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2200   /* make a local version of 'colp' */
2201   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2202   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2203   if (size==1) {
2204     lcolp = colp;
2205   } else {
2206     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2207   }
2208   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2209   /* now we just get the submatrix */
2210   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2211   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2212   /* clean up */
2213   if (pcomm!=comm) {
2214     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2215   }
2216   if (size>1) {
2217     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2218   }
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2223 {
2224   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2225   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2226 
2227   PetscFunctionBegin;
2228   if (nghosts) *nghosts = B->nbs;
2229   if (ghosts) *ghosts = baij->garray;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2234 {
2235   Mat            B;
2236   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2237   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2238   Mat_SeqAIJ     *b;
2239   PetscErrorCode ierr;
2240   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2241   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2242   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2243 
2244   PetscFunctionBegin;
2245   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2246   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2247 
2248   /* ----------------------------------------------------------------
2249      Tell every processor the number of nonzeros per row
2250   */
2251   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2252   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2253     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2254   }
2255   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2256   displs    = recvcounts + size;
2257   for (i=0; i<size; i++) {
2258     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2259     displs[i]     = A->rmap->range[i]/bs;
2260   }
2261 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2262   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2263 #else
2264   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2265   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2266 #endif
2267   /* ---------------------------------------------------------------
2268      Create the sequential matrix of the same type as the local block diagonal
2269   */
2270   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2271   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2272   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2273   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2274   b    = (Mat_SeqAIJ*)B->data;
2275 
2276   /*--------------------------------------------------------------------
2277     Copy my part of matrix column indices over
2278   */
2279   sendcount  = ad->nz + bd->nz;
2280   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2281   a_jsendbuf = ad->j;
2282   b_jsendbuf = bd->j;
2283   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2284   cnt        = 0;
2285   for (i=0; i<n; i++) {
2286 
2287     /* put in lower diagonal portion */
2288     m = bd->i[i+1] - bd->i[i];
2289     while (m > 0) {
2290       /* is it above diagonal (in bd (compressed) numbering) */
2291       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2292       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2293       m--;
2294     }
2295 
2296     /* put in diagonal portion */
2297     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2298       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2299     }
2300 
2301     /* put in upper diagonal portion */
2302     while (m-- > 0) {
2303       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2304     }
2305   }
2306   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2307 
2308   /*--------------------------------------------------------------------
2309     Gather all column indices to all processors
2310   */
2311   for (i=0; i<size; i++) {
2312     recvcounts[i] = 0;
2313     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2314       recvcounts[i] += lens[j];
2315     }
2316   }
2317   displs[0] = 0;
2318   for (i=1; i<size; i++) {
2319     displs[i] = displs[i-1] + recvcounts[i-1];
2320   }
2321 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2322   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2323 #else
2324   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2325 #endif
2326   /*--------------------------------------------------------------------
2327     Assemble the matrix into useable form (note numerical values not yet set)
2328   */
2329   /* set the b->ilen (length of each row) values */
2330   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2331   /* set the b->i indices */
2332   b->i[0] = 0;
2333   for (i=1; i<=A->rmap->N/bs; i++) {
2334     b->i[i] = b->i[i-1] + lens[i-1];
2335   }
2336   ierr = PetscFree(lens);CHKERRQ(ierr);
2337   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2340 
2341   if (A->symmetric) {
2342     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2343   } else if (A->hermitian) {
2344     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2345   } else if (A->structurally_symmetric) {
2346     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2347   }
2348   *newmat = B;
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2353 {
2354   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2355   PetscErrorCode ierr;
2356   Vec            bb1 = 0;
2357 
2358   PetscFunctionBegin;
2359   if (flag == SOR_APPLY_UPPER) {
2360     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2365     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2366   }
2367 
2368   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2369     if (flag & SOR_ZERO_INITIAL_GUESS) {
2370       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2371       its--;
2372     }
2373 
2374     while (its--) {
2375       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2376       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2377 
2378       /* update rhs: bb1 = bb - B*x */
2379       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2380       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2381 
2382       /* local sweep */
2383       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2384     }
2385   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2386     if (flag & SOR_ZERO_INITIAL_GUESS) {
2387       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2388       its--;
2389     }
2390     while (its--) {
2391       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2393 
2394       /* update rhs: bb1 = bb - B*x */
2395       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2396       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2397 
2398       /* local sweep */
2399       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2400     }
2401   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2402     if (flag & SOR_ZERO_INITIAL_GUESS) {
2403       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2404       its--;
2405     }
2406     while (its--) {
2407       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2408       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2409 
2410       /* update rhs: bb1 = bb - B*x */
2411       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2412       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2413 
2414       /* local sweep */
2415       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2416     }
2417   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2418 
2419   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2424 {
2425   PetscErrorCode ierr;
2426   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2427   PetscInt       N,i,*garray = aij->garray;
2428   PetscInt       ib,jb,bs = A->rmap->bs;
2429   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2430   MatScalar      *a_val = a_aij->a;
2431   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2432   MatScalar      *b_val = b_aij->a;
2433   PetscReal      *work;
2434 
2435   PetscFunctionBegin;
2436   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2437   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2438   if (type == NORM_2) {
2439     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2440       for (jb=0; jb<bs; jb++) {
2441         for (ib=0; ib<bs; ib++) {
2442           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2443           a_val++;
2444         }
2445       }
2446     }
2447     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2448       for (jb=0; jb<bs; jb++) {
2449         for (ib=0; ib<bs; ib++) {
2450           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2451           b_val++;
2452         }
2453       }
2454     }
2455   } else if (type == NORM_1) {
2456     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2457       for (jb=0; jb<bs; jb++) {
2458         for (ib=0; ib<bs; ib++) {
2459           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2460           a_val++;
2461         }
2462       }
2463     }
2464     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2465       for (jb=0; jb<bs; jb++) {
2466        for (ib=0; ib<bs; ib++) {
2467           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2468           b_val++;
2469         }
2470       }
2471     }
2472   } else if (type == NORM_INFINITY) {
2473     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2474       for (jb=0; jb<bs; jb++) {
2475         for (ib=0; ib<bs; ib++) {
2476           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2477           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2478           a_val++;
2479         }
2480       }
2481     }
2482     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           int col = garray[b_aij->j[i]] * bs + jb;
2486           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2487           b_val++;
2488         }
2489       }
2490     }
2491   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2492   if (type == NORM_INFINITY) {
2493     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2494   } else {
2495     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2496   }
2497   ierr = PetscFree(work);CHKERRQ(ierr);
2498   if (type == NORM_2) {
2499     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2505 {
2506   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2507   PetscErrorCode ierr;
2508 
2509   PetscFunctionBegin;
2510   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2511   A->factorerrortype             = a->A->factorerrortype;
2512   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2513   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2518 {
2519   PetscErrorCode ierr;
2520   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2521   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2522 
2523   PetscFunctionBegin;
2524   if (!Y->preallocated) {
2525     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2526   } else if (!aij->nz) {
2527     PetscInt nonew = aij->nonew;
2528     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2529     aij->nonew = nonew;
2530   }
2531   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2536 {
2537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2542   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2543   if (d) {
2544     PetscInt rstart;
2545     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2546     *d += rstart/A->rmap->bs;
2547 
2548   }
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2553 {
2554   PetscFunctionBegin;
2555   *a = ((Mat_MPIBAIJ*)A->data)->A;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2561                                        MatGetRow_MPIBAIJ,
2562                                        MatRestoreRow_MPIBAIJ,
2563                                        MatMult_MPIBAIJ,
2564                                 /* 4*/ MatMultAdd_MPIBAIJ,
2565                                        MatMultTranspose_MPIBAIJ,
2566                                        MatMultTransposeAdd_MPIBAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIBAIJ,
2574                                        MatTranspose_MPIBAIJ,
2575                                 /*15*/ MatGetInfo_MPIBAIJ,
2576                                        MatEqual_MPIBAIJ,
2577                                        MatGetDiagonal_MPIBAIJ,
2578                                        MatDiagonalScale_MPIBAIJ,
2579                                        MatNorm_MPIBAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2581                                        MatAssemblyEnd_MPIBAIJ,
2582                                        MatSetOption_MPIBAIJ,
2583                                        MatZeroEntries_MPIBAIJ,
2584                                 /*24*/ MatZeroRows_MPIBAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIBAIJ,
2590                                        0,
2591                                        0,
2592                                        MatGetDiagonalBlock_MPIBAIJ,
2593                                        0,
2594                                 /*34*/ MatDuplicate_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*39*/ MatAXPY_MPIBAIJ,
2600                                        MatCreateSubMatrices_MPIBAIJ,
2601                                        MatIncreaseOverlap_MPIBAIJ,
2602                                        MatGetValues_MPIBAIJ,
2603                                        MatCopy_MPIBAIJ,
2604                                 /*44*/ 0,
2605                                        MatScale_MPIBAIJ,
2606                                        MatShift_MPIBAIJ,
2607                                        0,
2608                                        MatZeroRowsColumns_MPIBAIJ,
2609                                 /*49*/ 0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2615                                        0,
2616                                        MatSetUnfactored_MPIBAIJ,
2617                                        MatPermute_MPIBAIJ,
2618                                        MatSetValuesBlocked_MPIBAIJ,
2619                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2620                                        MatDestroy_MPIBAIJ,
2621                                        MatView_MPIBAIJ,
2622                                        0,
2623                                        0,
2624                                 /*64*/ 0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*74*/ 0,
2635                                        MatFDColoringApply_BAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*79*/ 0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        MatLoad_MPIBAIJ,
2644                                 /*84*/ 0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*89*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*94*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*99*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*104*/0,
2665                                        MatRealPart_MPIBAIJ,
2666                                        MatImaginaryPart_MPIBAIJ,
2667                                        0,
2668                                        0,
2669                                 /*109*/0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        MatMissingDiagonal_MPIBAIJ,
2674                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2675                                        0,
2676                                        MatGetGhosts_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*119*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatGetMultiProcBlock_MPIBAIJ,
2684                                 /*124*/0,
2685                                        MatGetColumnNorms_MPIBAIJ,
2686                                        MatInvertBlockDiagonal_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                /*129*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                /*134*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                /*139*/ MatSetBlockSizes_Default,
2700                                        0,
2701                                        0,
2702                                        MatFDColoringSetUp_MPIXAIJ,
2703                                        0,
2704                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2705 };
2706 
2707 
2708 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2709 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2710 
2711 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2712 {
2713   PetscInt       m,rstart,cstart,cend;
2714   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2715   const PetscInt *JJ    =0;
2716   PetscScalar    *values=0;
2717   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2722   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2723   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2724   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2725   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2726   m      = B->rmap->n/bs;
2727   rstart = B->rmap->rstart/bs;
2728   cstart = B->cmap->rstart/bs;
2729   cend   = B->cmap->rend/bs;
2730 
2731   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2732   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2733   for (i=0; i<m; i++) {
2734     nz = ii[i+1] - ii[i];
2735     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2736     nz_max = PetscMax(nz_max,nz);
2737     dlen   = 0;
2738     olen   = 0;
2739     JJ     = jj + ii[i];
2740     for (j=0; j<nz; j++) {
2741       if (*JJ < cstart || *JJ >= cend) olen++;
2742       else dlen++;
2743       JJ++;
2744     }
2745     d_nnz[i] = dlen;
2746     o_nnz[i] = olen;
2747   }
2748   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2749   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2750 
2751   values = (PetscScalar*)V;
2752   if (!values) {
2753     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2754   }
2755   for (i=0; i<m; i++) {
2756     PetscInt          row    = i + rstart;
2757     PetscInt          ncols  = ii[i+1] - ii[i];
2758     const PetscInt    *icols = jj + ii[i];
2759     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2760       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2761       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2762     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2763       PetscInt j;
2764       for (j=0; j<ncols; j++) {
2765         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2766         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2767       }
2768     }
2769   }
2770 
2771   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2772   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2774   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 /*@C
2779    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2780    (the default parallel PETSc format).
2781 
2782    Collective
2783 
2784    Input Parameters:
2785 +  B - the matrix
2786 .  bs - the block size
2787 .  i - the indices into j for the start of each local row (starts with zero)
2788 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2789 -  v - optional values in the matrix
2790 
2791    Level: developer
2792 
2793    Notes:
2794     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2795    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2796    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2797    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2798    block column and the second index is over columns within a block.
2799 
2800 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2801 @*/
2802 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2803 {
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2808   PetscValidType(B,1);
2809   PetscValidLogicalCollectiveInt(B,bs,2);
2810   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2815 {
2816   Mat_MPIBAIJ    *b;
2817   PetscErrorCode ierr;
2818   PetscInt       i;
2819   PetscMPIInt    size;
2820 
2821   PetscFunctionBegin;
2822   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2823   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2826 
2827   if (d_nnz) {
2828     for (i=0; i<B->rmap->n/bs; i++) {
2829       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2830     }
2831   }
2832   if (o_nnz) {
2833     for (i=0; i<B->rmap->n/bs; i++) {
2834       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2835     }
2836   }
2837 
2838   b      = (Mat_MPIBAIJ*)B->data;
2839   b->bs2 = bs*bs;
2840   b->mbs = B->rmap->n/bs;
2841   b->nbs = B->cmap->n/bs;
2842   b->Mbs = B->rmap->N/bs;
2843   b->Nbs = B->cmap->N/bs;
2844 
2845   for (i=0; i<=b->size; i++) {
2846     b->rangebs[i] = B->rmap->range[i]/bs;
2847   }
2848   b->rstartbs = B->rmap->rstart/bs;
2849   b->rendbs   = B->rmap->rend/bs;
2850   b->cstartbs = B->cmap->rstart/bs;
2851   b->cendbs   = B->cmap->rend/bs;
2852 
2853 #if defined(PETSC_USE_CTABLE)
2854   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2855 #else
2856   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2857 #endif
2858   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2859   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2860   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2861 
2862   /* Because the B will have been resized we simply destroy it and create a new one each time */
2863   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2864   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2865   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2866   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2867   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2868   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2869 
2870   if (!B->preallocated) {
2871     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2872     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2873     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2874     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2875     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2876   }
2877 
2878   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2879   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2880   B->preallocated  = PETSC_TRUE;
2881   B->was_assembled = PETSC_FALSE;
2882   B->assembled     = PETSC_FALSE;
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2887 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2888 
2889 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2890 {
2891   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2892   PetscErrorCode ierr;
2893   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2894   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2895   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2896 
2897   PetscFunctionBegin;
2898   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2899   ii[0] = 0;
2900   for (i=0; i<M; i++) {
2901     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2902     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2903     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2904     /* remove one from count of matrix has diagonal */
2905     for (j=id[i]; j<id[i+1]; j++) {
2906       if (jd[j] == i) {ii[i+1]--;break;}
2907     }
2908   }
2909   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2910   cnt  = 0;
2911   for (i=0; i<M; i++) {
2912     for (j=io[i]; j<io[i+1]; j++) {
2913       if (garray[jo[j]] > rstart) break;
2914       jj[cnt++] = garray[jo[j]];
2915     }
2916     for (k=id[i]; k<id[i+1]; k++) {
2917       if (jd[k] != i) {
2918         jj[cnt++] = rstart + jd[k];
2919       }
2920     }
2921     for (; j<io[i+1]; j++) {
2922       jj[cnt++] = garray[jo[j]];
2923     }
2924   }
2925   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2930 
2931 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2932 
2933 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2934 {
2935   PetscErrorCode ierr;
2936   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2937   Mat            B;
2938   Mat_MPIAIJ     *b;
2939 
2940   PetscFunctionBegin;
2941   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2942 
2943   if (reuse == MAT_REUSE_MATRIX) {
2944     B = *newmat;
2945   } else {
2946     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2947     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2948     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2949     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2950     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2951     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2952   }
2953   b = (Mat_MPIAIJ*) B->data;
2954 
2955   if (reuse == MAT_REUSE_MATRIX) {
2956     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2957     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2958   } else {
2959     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2960     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2961     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2962     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2963     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2964     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2965     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   }
2967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969 
2970   if (reuse == MAT_INPLACE_MATRIX) {
2971     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2972   } else {
2973    *newmat = B;
2974   }
2975   PetscFunctionReturn(0);
2976 }
2977 
2978 /*MC
2979    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2980 
2981    Options Database Keys:
2982 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2983 . -mat_block_size <bs> - set the blocksize used to store the matrix
2984 - -mat_use_hash_table <fact>
2985 
2986   Level: beginner
2987 
2988 .seealso: MatCreateMPIBAIJ
2989 M*/
2990 
2991 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2992 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
2993 
2994 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2995 {
2996   Mat_MPIBAIJ    *b;
2997   PetscErrorCode ierr;
2998   PetscBool      flg = PETSC_FALSE;
2999 
3000   PetscFunctionBegin;
3001   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3002   B->data = (void*)b;
3003 
3004   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3005   B->assembled = PETSC_FALSE;
3006 
3007   B->insertmode = NOT_SET_VALUES;
3008   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3009   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3010 
3011   /* build local table of row and column ownerships */
3012   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3013 
3014   /* build cache for off array entries formed */
3015   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3016 
3017   b->donotstash  = PETSC_FALSE;
3018   b->colmap      = NULL;
3019   b->garray      = NULL;
3020   b->roworiented = PETSC_TRUE;
3021 
3022   /* stuff used in block assembly */
3023   b->barray = 0;
3024 
3025   /* stuff used for matrix vector multiply */
3026   b->lvec  = 0;
3027   b->Mvctx = 0;
3028 
3029   /* stuff for MatGetRow() */
3030   b->rowindices   = 0;
3031   b->rowvalues    = 0;
3032   b->getrowactive = PETSC_FALSE;
3033 
3034   /* hash table stuff */
3035   b->ht           = 0;
3036   b->hd           = 0;
3037   b->ht_size      = 0;
3038   b->ht_flag      = PETSC_FALSE;
3039   b->ht_fact      = 0;
3040   b->ht_total_ct  = 0;
3041   b->ht_insert_ct = 0;
3042 
3043   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3044   b->ijonly = PETSC_FALSE;
3045 
3046 
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3050 #if defined(PETSC_HAVE_HYPRE)
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3052 #endif
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3061   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3062 
3063   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3064   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3065   if (flg) {
3066     PetscReal fact = 1.39;
3067     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3068     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3069     if (fact <= 1.0) fact = 1.39;
3070     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3071     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3072   }
3073   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 /*MC
3078    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3079 
3080    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3081    and MATMPIBAIJ otherwise.
3082 
3083    Options Database Keys:
3084 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3085 
3086   Level: beginner
3087 
3088 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3089 M*/
3090 
3091 /*@C
3092    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3093    (block compressed row).  For good matrix assembly performance
3094    the user should preallocate the matrix storage by setting the parameters
3095    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3096    performance can be increased by more than a factor of 50.
3097 
3098    Collective on Mat
3099 
3100    Input Parameters:
3101 +  B - the matrix
3102 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3103           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3104 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3105            submatrix  (same for all local rows)
3106 .  d_nnz - array containing the number of block nonzeros in the various block rows
3107            of the in diagonal portion of the local (possibly different for each block
3108            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3109            set it even if it is zero.
3110 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3111            submatrix (same for all local rows).
3112 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3113            off-diagonal portion of the local submatrix (possibly different for
3114            each block row) or NULL.
3115 
3116    If the *_nnz parameter is given then the *_nz parameter is ignored
3117 
3118    Options Database Keys:
3119 +   -mat_block_size - size of the blocks to use
3120 -   -mat_use_hash_table <fact>
3121 
3122    Notes:
3123    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3124    than it must be used on all processors that share the object for that argument.
3125 
3126    Storage Information:
3127    For a square global matrix we define each processor's diagonal portion
3128    to be its local rows and the corresponding columns (a square submatrix);
3129    each processor's off-diagonal portion encompasses the remainder of the
3130    local matrix (a rectangular submatrix).
3131 
3132    The user can specify preallocated storage for the diagonal part of
3133    the local submatrix with either d_nz or d_nnz (not both).  Set
3134    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3135    memory allocation.  Likewise, specify preallocated storage for the
3136    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3137 
3138    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3139    the figure below we depict these three local rows and all columns (0-11).
3140 
3141 .vb
3142            0 1 2 3 4 5 6 7 8 9 10 11
3143           --------------------------
3144    row 3  |o o o d d d o o o o  o  o
3145    row 4  |o o o d d d o o o o  o  o
3146    row 5  |o o o d d d o o o o  o  o
3147           --------------------------
3148 .ve
3149 
3150    Thus, any entries in the d locations are stored in the d (diagonal)
3151    submatrix, and any entries in the o locations are stored in the
3152    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3153    stored simply in the MATSEQBAIJ format for compressed row storage.
3154 
3155    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3156    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3157    In general, for PDE problems in which most nonzeros are near the diagonal,
3158    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3159    or you will get TERRIBLE performance; see the users' manual chapter on
3160    matrices.
3161 
3162    You can call MatGetInfo() to get information on how effective the preallocation was;
3163    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3164    You can also run with the option -info and look for messages with the string
3165    malloc in them to see if additional memory allocation was needed.
3166 
3167    Level: intermediate
3168 
3169 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3170 @*/
3171 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3172 {
3173   PetscErrorCode ierr;
3174 
3175   PetscFunctionBegin;
3176   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3177   PetscValidType(B,1);
3178   PetscValidLogicalCollectiveInt(B,bs,2);
3179   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 /*@C
3184    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3185    (block compressed row).  For good matrix assembly performance
3186    the user should preallocate the matrix storage by setting the parameters
3187    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3188    performance can be increased by more than a factor of 50.
3189 
3190    Collective
3191 
3192    Input Parameters:
3193 +  comm - MPI communicator
3194 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3195           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3196 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3197            This value should be the same as the local size used in creating the
3198            y vector for the matrix-vector product y = Ax.
3199 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3200            This value should be the same as the local size used in creating the
3201            x vector for the matrix-vector product y = Ax.
3202 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3203 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3204 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3205            submatrix  (same for all local rows)
3206 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3207            of the in diagonal portion of the local (possibly different for each block
3208            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3209            and set it even if it is zero.
3210 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3211            submatrix (same for all local rows).
3212 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3213            off-diagonal portion of the local submatrix (possibly different for
3214            each block row) or NULL.
3215 
3216    Output Parameter:
3217 .  A - the matrix
3218 
3219    Options Database Keys:
3220 +   -mat_block_size - size of the blocks to use
3221 -   -mat_use_hash_table <fact>
3222 
3223    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3224    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3225    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3226 
3227    Notes:
3228    If the *_nnz parameter is given then the *_nz parameter is ignored
3229 
3230    A nonzero block is any block that as 1 or more nonzeros in it
3231 
3232    The user MUST specify either the local or global matrix dimensions
3233    (possibly both).
3234 
3235    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3236    than it must be used on all processors that share the object for that argument.
3237 
3238    Storage Information:
3239    For a square global matrix we define each processor's diagonal portion
3240    to be its local rows and the corresponding columns (a square submatrix);
3241    each processor's off-diagonal portion encompasses the remainder of the
3242    local matrix (a rectangular submatrix).
3243 
3244    The user can specify preallocated storage for the diagonal part of
3245    the local submatrix with either d_nz or d_nnz (not both).  Set
3246    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3247    memory allocation.  Likewise, specify preallocated storage for the
3248    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3249 
3250    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3251    the figure below we depict these three local rows and all columns (0-11).
3252 
3253 .vb
3254            0 1 2 3 4 5 6 7 8 9 10 11
3255           --------------------------
3256    row 3  |o o o d d d o o o o  o  o
3257    row 4  |o o o d d d o o o o  o  o
3258    row 5  |o o o d d d o o o o  o  o
3259           --------------------------
3260 .ve
3261 
3262    Thus, any entries in the d locations are stored in the d (diagonal)
3263    submatrix, and any entries in the o locations are stored in the
3264    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3265    stored simply in the MATSEQBAIJ format for compressed row storage.
3266 
3267    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3268    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3269    In general, for PDE problems in which most nonzeros are near the diagonal,
3270    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3271    or you will get TERRIBLE performance; see the users' manual chapter on
3272    matrices.
3273 
3274    Level: intermediate
3275 
3276 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3277 @*/
3278 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3279 {
3280   PetscErrorCode ierr;
3281   PetscMPIInt    size;
3282 
3283   PetscFunctionBegin;
3284   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3285   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3286   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3287   if (size > 1) {
3288     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3289     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3290   } else {
3291     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3292     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3293   }
3294   PetscFunctionReturn(0);
3295 }
3296 
3297 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3298 {
3299   Mat            mat;
3300   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3301   PetscErrorCode ierr;
3302   PetscInt       len=0;
3303 
3304   PetscFunctionBegin;
3305   *newmat = 0;
3306   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3307   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3308   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3309 
3310   mat->factortype   = matin->factortype;
3311   mat->preallocated = PETSC_TRUE;
3312   mat->assembled    = PETSC_TRUE;
3313   mat->insertmode   = NOT_SET_VALUES;
3314 
3315   a             = (Mat_MPIBAIJ*)mat->data;
3316   mat->rmap->bs = matin->rmap->bs;
3317   a->bs2        = oldmat->bs2;
3318   a->mbs        = oldmat->mbs;
3319   a->nbs        = oldmat->nbs;
3320   a->Mbs        = oldmat->Mbs;
3321   a->Nbs        = oldmat->Nbs;
3322 
3323   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3324   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3325 
3326   a->size         = oldmat->size;
3327   a->rank         = oldmat->rank;
3328   a->donotstash   = oldmat->donotstash;
3329   a->roworiented  = oldmat->roworiented;
3330   a->rowindices   = 0;
3331   a->rowvalues    = 0;
3332   a->getrowactive = PETSC_FALSE;
3333   a->barray       = 0;
3334   a->rstartbs     = oldmat->rstartbs;
3335   a->rendbs       = oldmat->rendbs;
3336   a->cstartbs     = oldmat->cstartbs;
3337   a->cendbs       = oldmat->cendbs;
3338 
3339   /* hash table stuff */
3340   a->ht           = 0;
3341   a->hd           = 0;
3342   a->ht_size      = 0;
3343   a->ht_flag      = oldmat->ht_flag;
3344   a->ht_fact      = oldmat->ht_fact;
3345   a->ht_total_ct  = 0;
3346   a->ht_insert_ct = 0;
3347 
3348   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3349   if (oldmat->colmap) {
3350 #if defined(PETSC_USE_CTABLE)
3351     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3352 #else
3353     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3354     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3355     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3356 #endif
3357   } else a->colmap = 0;
3358 
3359   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3360     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3361     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3362     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3363   } else a->garray = 0;
3364 
3365   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3366   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3367   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3368   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3369   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3370 
3371   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3372   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3373   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3374   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3375   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3376   *newmat = mat;
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3381 {
3382   PetscErrorCode ierr;
3383   int            fd;
3384   PetscInt       i,nz,j,rstart,rend;
3385   PetscScalar    *vals,*buf;
3386   MPI_Comm       comm;
3387   MPI_Status     status;
3388   PetscMPIInt    rank,size,maxnz;
3389   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3390   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3391   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3392   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3393   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3394   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3395   PetscBool      isbinary;
3396 
3397   PetscFunctionBegin;
3398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3399   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3400 
3401   /* force binary viewer to load .info file if it has not yet done so */
3402   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3403   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3404   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3405   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3406   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3407   if (bs < 0) bs = 1;
3408 
3409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3410   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3411   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3412   if (!rank) {
3413     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3414     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3415     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3416   }
3417   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3418   M    = header[1]; N = header[2];
3419 
3420   /* If global sizes are set, check if they are consistent with that given in the file */
3421   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3422   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3423 
3424   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3425 
3426   /*
3427      This code adds extra rows to make sure the number of rows is
3428      divisible by the blocksize
3429   */
3430   Mbs        = M/bs;
3431   extra_rows = bs - M + bs*Mbs;
3432   if (extra_rows == bs) extra_rows = 0;
3433   else                  Mbs++;
3434   if (extra_rows && !rank) {
3435     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3436   }
3437 
3438   /* determine ownership of all rows */
3439   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3440     mbs = Mbs/size + ((Mbs % size) > rank);
3441     m   = mbs*bs;
3442   } else { /* User set */
3443     m   = newmat->rmap->n;
3444     mbs = m/bs;
3445   }
3446   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3447   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3448 
3449   /* process 0 needs enough room for process with most rows */
3450   if (!rank) {
3451     mmax = rowners[1];
3452     for (i=2; i<=size; i++) {
3453       mmax = PetscMax(mmax,rowners[i]);
3454     }
3455     mmax*=bs;
3456   } else mmax = -1;             /* unused, but compiler warns anyway */
3457 
3458   rowners[0] = 0;
3459   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3460   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3461   rstart = rowners[rank];
3462   rend   = rowners[rank+1];
3463 
3464   /* distribute row lengths to all processors */
3465   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3466   if (!rank) {
3467     mend = m;
3468     if (size == 1) mend = mend - extra_rows;
3469     ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3470     for (j=mend; j<m; j++) locrowlens[j] = 1;
3471     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3472     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
3473     procsnz[0] = 0;
3474     for (j=0; j<m; j++) {
3475       procsnz[0] += locrowlens[j];
3476     }
3477     for (i=1; i<size; i++) {
3478       mend = browners[i+1] - browners[i];
3479       if (i == size-1) mend = mend - extra_rows;
3480       ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3481       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3482       /* calculate the number of nonzeros on each processor */
3483       procsnz[i] = 0;
3484       for (j=0; j<browners[i+1]-browners[i]; j++) {
3485         procsnz[i] += rowlengths[j];
3486       }
3487       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3488     }
3489     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3490   } else {
3491     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3492   }
3493 
3494   if (!rank) {
3495     /* determine max buffer needed and allocate it */
3496     maxnz = procsnz[0];
3497     for (i=1; i<size; i++) {
3498       maxnz = PetscMax(maxnz,procsnz[i]);
3499     }
3500     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3501 
3502     /* read in my part of the matrix column indices  */
3503     nz     = procsnz[0];
3504     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3505     mycols = ibuf;
3506     if (size == 1) nz -= extra_rows;
3507     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3508     if (size == 1) {
3509       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3510     }
3511 
3512     /* read in every ones (except the last) and ship off */
3513     for (i=1; i<size-1; i++) {
3514       nz   = procsnz[i];
3515       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3516       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3517     }
3518     /* read in the stuff for the last proc */
3519     if (size != 1) {
3520       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3521       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3522       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3523       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3524     }
3525     ierr = PetscFree(cols);CHKERRQ(ierr);
3526   } else {
3527     /* determine buffer space needed for message */
3528     nz = 0;
3529     for (i=0; i<m; i++) {
3530       nz += locrowlens[i];
3531     }
3532     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3533     mycols = ibuf;
3534     /* receive message of column indices*/
3535     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3536     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3537     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3538   }
3539 
3540   /* loop over local rows, determining number of off diagonal entries */
3541   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3542   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3543   rowcount = 0; nzcount = 0;
3544   for (i=0; i<mbs; i++) {
3545     dcount  = 0;
3546     odcount = 0;
3547     for (j=0; j<bs; j++) {
3548       kmax = locrowlens[rowcount];
3549       for (k=0; k<kmax; k++) {
3550         tmp = mycols[nzcount++]/bs;
3551         if (!mask[tmp]) {
3552           mask[tmp] = 1;
3553           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3554           else masked1[dcount++] = tmp;
3555         }
3556       }
3557       rowcount++;
3558     }
3559 
3560     dlens[i]  = dcount;
3561     odlens[i] = odcount;
3562 
3563     /* zero out the mask elements we set */
3564     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3565     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3566   }
3567 
3568   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3569   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3570 
3571   if (!rank) {
3572     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3573     /* read in my part of the matrix numerical values  */
3574     nz     = procsnz[0];
3575     vals   = buf;
3576     mycols = ibuf;
3577     if (size == 1) nz -= extra_rows;
3578     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3579     if (size == 1) {
3580       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3581     }
3582 
3583     /* insert into matrix */
3584     jj = rstart*bs;
3585     for (i=0; i<m; i++) {
3586       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3587       mycols += locrowlens[i];
3588       vals   += locrowlens[i];
3589       jj++;
3590     }
3591     /* read in other processors (except the last one) and ship out */
3592     for (i=1; i<size-1; i++) {
3593       nz   = procsnz[i];
3594       vals = buf;
3595       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3596       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3597     }
3598     /* the last proc */
3599     if (size != 1) {
3600       nz   = procsnz[i] - extra_rows;
3601       vals = buf;
3602       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3603       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3604       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3605     }
3606     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3607   } else {
3608     /* receive numeric values */
3609     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3610 
3611     /* receive message of values*/
3612     vals   = buf;
3613     mycols = ibuf;
3614     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3615 
3616     /* insert into matrix */
3617     jj = rstart*bs;
3618     for (i=0; i<m; i++) {
3619       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3620       mycols += locrowlens[i];
3621       vals   += locrowlens[i];
3622       jj++;
3623     }
3624   }
3625   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3626   ierr = PetscFree(buf);CHKERRQ(ierr);
3627   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3628   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3629   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3630   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3631   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 /*@
3637    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3638 
3639    Input Parameters:
3640 +  mat  - the matrix
3641 -  fact - factor
3642 
3643    Not Collective, each process can use a different factor
3644 
3645    Level: advanced
3646 
3647   Notes:
3648    This can also be set by the command line option: -mat_use_hash_table <fact>
3649 
3650 .seealso: MatSetOption()
3651 @*/
3652 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3653 {
3654   PetscErrorCode ierr;
3655 
3656   PetscFunctionBegin;
3657   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3658   PetscFunctionReturn(0);
3659 }
3660 
3661 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3662 {
3663   Mat_MPIBAIJ *baij;
3664 
3665   PetscFunctionBegin;
3666   baij          = (Mat_MPIBAIJ*)mat->data;
3667   baij->ht_fact = fact;
3668   PetscFunctionReturn(0);
3669 }
3670 
3671 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3672 {
3673   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3674   PetscBool      flg;
3675   PetscErrorCode ierr;
3676 
3677   PetscFunctionBegin;
3678   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3679   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3680   if (Ad)     *Ad     = a->A;
3681   if (Ao)     *Ao     = a->B;
3682   if (colmap) *colmap = a->garray;
3683   PetscFunctionReturn(0);
3684 }
3685 
3686 /*
3687     Special version for direct calls from Fortran (to eliminate two function call overheads
3688 */
3689 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3690 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3691 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3692 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3693 #endif
3694 
3695 /*@C
3696   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3697 
3698   Collective on Mat
3699 
3700   Input Parameters:
3701 + mat - the matrix
3702 . min - number of input rows
3703 . im - input rows
3704 . nin - number of input columns
3705 . in - input columns
3706 . v - numerical values input
3707 - addvin - INSERT_VALUES or ADD_VALUES
3708 
3709   Notes:
3710     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3711 
3712   Level: advanced
3713 
3714 .seealso:   MatSetValuesBlocked()
3715 @*/
3716 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3717 {
3718   /* convert input arguments to C version */
3719   Mat        mat  = *matin;
3720   PetscInt   m    = *min, n = *nin;
3721   InsertMode addv = *addvin;
3722 
3723   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3724   const MatScalar *value;
3725   MatScalar       *barray     = baij->barray;
3726   PetscBool       roworiented = baij->roworiented;
3727   PetscErrorCode  ierr;
3728   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3729   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3730   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3731 
3732   PetscFunctionBegin;
3733   /* tasks normally handled by MatSetValuesBlocked() */
3734   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3735 #if defined(PETSC_USE_DEBUG)
3736   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3737   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3738 #endif
3739   if (mat->assembled) {
3740     mat->was_assembled = PETSC_TRUE;
3741     mat->assembled     = PETSC_FALSE;
3742   }
3743   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3744 
3745 
3746   if (!barray) {
3747     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3748     baij->barray = barray;
3749   }
3750 
3751   if (roworiented) stepval = (n-1)*bs;
3752   else stepval = (m-1)*bs;
3753 
3754   for (i=0; i<m; i++) {
3755     if (im[i] < 0) continue;
3756 #if defined(PETSC_USE_DEBUG)
3757     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3758 #endif
3759     if (im[i] >= rstart && im[i] < rend) {
3760       row = im[i] - rstart;
3761       for (j=0; j<n; j++) {
3762         /* If NumCol = 1 then a copy is not required */
3763         if ((roworiented) && (n == 1)) {
3764           barray = (MatScalar*)v + i*bs2;
3765         } else if ((!roworiented) && (m == 1)) {
3766           barray = (MatScalar*)v + j*bs2;
3767         } else { /* Here a copy is required */
3768           if (roworiented) {
3769             value = v + i*(stepval+bs)*bs + j*bs;
3770           } else {
3771             value = v + j*(stepval+bs)*bs + i*bs;
3772           }
3773           for (ii=0; ii<bs; ii++,value+=stepval) {
3774             for (jj=0; jj<bs; jj++) {
3775               *barray++ = *value++;
3776             }
3777           }
3778           barray -=bs2;
3779         }
3780 
3781         if (in[j] >= cstart && in[j] < cend) {
3782           col  = in[j] - cstart;
3783           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3784         } else if (in[j] < 0) continue;
3785 #if defined(PETSC_USE_DEBUG)
3786         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3787 #endif
3788         else {
3789           if (mat->was_assembled) {
3790             if (!baij->colmap) {
3791               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3792             }
3793 
3794 #if defined(PETSC_USE_DEBUG)
3795 #if defined(PETSC_USE_CTABLE)
3796             { PetscInt data;
3797               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3798               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3799             }
3800 #else
3801             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3802 #endif
3803 #endif
3804 #if defined(PETSC_USE_CTABLE)
3805             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3806             col  = (col - 1)/bs;
3807 #else
3808             col = (baij->colmap[in[j]] - 1)/bs;
3809 #endif
3810             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3811               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3812               col  =  in[j];
3813             }
3814           } else col = in[j];
3815           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3816         }
3817       }
3818     } else {
3819       if (!baij->donotstash) {
3820         if (roworiented) {
3821           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3822         } else {
3823           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3824         }
3825       }
3826     }
3827   }
3828 
3829   /* task normally handled by MatSetValuesBlocked() */
3830   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 /*@
3835      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3836          CSR format the local rows.
3837 
3838    Collective
3839 
3840    Input Parameters:
3841 +  comm - MPI communicator
3842 .  bs - the block size, only a block size of 1 is supported
3843 .  m - number of local rows (Cannot be PETSC_DECIDE)
3844 .  n - This value should be the same as the local size used in creating the
3845        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3846        calculated if N is given) For square matrices n is almost always m.
3847 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3848 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3849 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3850 .   j - column indices
3851 -   a - matrix values
3852 
3853    Output Parameter:
3854 .   mat - the matrix
3855 
3856    Level: intermediate
3857 
3858    Notes:
3859        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3860      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3861      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3862 
3863      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3864      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3865      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3866      with column-major ordering within blocks.
3867 
3868        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3869 
3870 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3871           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3872 @*/
3873 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3874 {
3875   PetscErrorCode ierr;
3876 
3877   PetscFunctionBegin;
3878   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3879   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3880   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3881   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3882   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3883   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3884   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3885   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3886   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3887   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3888   PetscFunctionReturn(0);
3889 }
3890 
3891 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3892 {
3893   PetscErrorCode ierr;
3894   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3895   PetscInt       *indx;
3896   PetscScalar    *values;
3897 
3898   PetscFunctionBegin;
3899   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3900   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3901     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3902     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3903     PetscInt       *bindx,rmax=a->rmax,j;
3904     PetscMPIInt    rank,size;
3905 
3906     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3907     mbs = m/bs; Nbs = N/cbs;
3908     if (n == PETSC_DECIDE) {
3909       nbs  = n;
3910       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3911       n    = nbs*cbs;
3912     } else {
3913       nbs = n/cbs;
3914     }
3915 
3916     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3917     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3918 
3919     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3920     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3921     if (rank == size-1) {
3922       /* Check sum(nbs) = Nbs */
3923       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3924     }
3925 
3926     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3927     for (i=0; i<mbs; i++) {
3928       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3929       nnz = nnz/bs;
3930       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3931       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3932       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3933     }
3934     ierr = PetscFree(bindx);CHKERRQ(ierr);
3935 
3936     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3937     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3938     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3939     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3940     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3941     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3942     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3943   }
3944 
3945   /* numeric phase */
3946   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3947   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3948 
3949   for (i=0; i<m; i++) {
3950     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3951     Ii   = i + rstart;
3952     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3953     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3954   }
3955   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3956   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3957   PetscFunctionReturn(0);
3958 }
3959