xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d2fd7bfc6f0fd2e1d083decbb7cc7d77e16824f0)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1107 {
1108   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1109   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1110   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1113   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1114   int            fd;
1115   PetscScalar    *column_values;
1116   FILE           *file;
1117   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1118   PetscInt       message_count,flowcontrolcount;
1119 
1120   PetscFunctionBegin;
1121   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1122   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1123   nz   = bs2*(A->nz + B->nz);
1124   rlen = mat->rmap->n;
1125   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1126   if (!rank) {
1127     header[0] = MAT_FILE_CLASSID;
1128     header[1] = mat->rmap->N;
1129     header[2] = mat->cmap->N;
1130 
1131     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1132     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     /* get largest number of rows any processor has */
1134     range = mat->rmap->range;
1135     for (i=1; i<size; i++) {
1136       rlen = PetscMax(rlen,range[i+1] - range[i]);
1137     }
1138   } else {
1139     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1140   }
1141 
1142   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1143   /* compute lengths of each row  */
1144   for (i=0; i<a->mbs; i++) {
1145     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1146   }
1147   /* store the row lengths to the file */
1148   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1149   if (!rank) {
1150     MPI_Status status;
1151     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1152     rlen = (range[1] - range[0])/bs;
1153     for (i=0; i<rlen; i++) {
1154       for (j=0; j<bs; j++) {
1155         row_lens[i*bs+j] = bs*crow_lens[i];
1156       }
1157     }
1158     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     for (i=1; i<size; i++) {
1160       rlen = (range[i+1] - range[i])/bs;
1161       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1162       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1163       for (k=0; k<rlen; k++) {
1164         for (j=0; j<bs; j++) {
1165           row_lens[k*bs+j] = bs*crow_lens[k];
1166         }
1167       }
1168       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1171     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1172   } else {
1173     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1174     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1176   }
1177   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1178 
1179   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1180      information needed to make it for each row from a block row. This does require more communication but still not more than
1181      the communication needed for the nonzero values  */
1182   nzmax = nz; /*  space a largest processor needs */
1183   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1185   cnt   = 0;
1186   for (i=0; i<a->mbs; i++) {
1187     pcnt = cnt;
1188     for (j=B->i[i]; j<B->i[i+1]; j++) {
1189       if ((col = garray[B->j[j]]) > cstart) break;
1190       for (l=0; l<bs; l++) {
1191         column_indices[cnt++] = bs*col+l;
1192       }
1193     }
1194     for (k=A->i[i]; k<A->i[i+1]; k++) {
1195       for (l=0; l<bs; l++) {
1196         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1197       }
1198     }
1199     for (; j<B->i[i+1]; j++) {
1200       for (l=0; l<bs; l++) {
1201         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1202       }
1203     }
1204     len = cnt - pcnt;
1205     for (k=1; k<bs; k++) {
1206       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1207       cnt += len;
1208     }
1209   }
1210   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1211 
1212   /* store the columns to the file */
1213   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1214   if (!rank) {
1215     MPI_Status status;
1216     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1217     for (i=1; i<size; i++) {
1218       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1219       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1220       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the numerical values */
1233   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<a->mbs; i++) {
1236     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1237     for (j=B->i[i]; j<B->i[i+1]; j++) {
1238       if (garray[B->j[j]] > cstart) break;
1239       for (l=0; l<bs; l++) {
1240         for (ll=0; ll<bs; ll++) {
1241           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1242         }
1243       }
1244       cnt += bs;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) {
1247       for (l=0; l<bs; l++) {
1248         for (ll=0; ll<bs; ll++) {
1249           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1250         }
1251       }
1252       cnt += bs;
1253     }
1254     for (; j<B->i[i+1]; j++) {
1255       for (l=0; l<bs; l++) {
1256         for (ll=0; ll<bs; ll++) {
1257           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1258         }
1259       }
1260       cnt += bs;
1261     }
1262     cnt += (bs-1)*rlen;
1263   }
1264   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1265 
1266   /* store the column values to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) {
1288     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1294 {
1295   PetscErrorCode ierr;
1296   PetscBool      iascii,isdraw,issocket,isbinary;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1300   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1301   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1303   if (iascii || isdraw || issocket) {
1304     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1305   } else if (isbinary) {
1306     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1312 {
1313   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317 #if defined(PETSC_USE_LOG)
1318   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1319 #endif
1320   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1321   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1322   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1323   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1324 #if defined(PETSC_USE_CTABLE)
1325   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1326 #else
1327   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1328 #endif
1329   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1330   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1331   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1332   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1333   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1334   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1335   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1336   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1337 
1338   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1340   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1341   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1342   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1343   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1344   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1345   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1347 #if defined(PETSC_HAVE_HYPRE)
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1349 #endif
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1356 {
1357   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359   PetscInt       nt;
1360 
1361   PetscFunctionBegin;
1362   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1363   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1364   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1365   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1366   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1367   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1368   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1369   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1382   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1387 {
1388   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1389   PetscErrorCode ierr;
1390 
1391   PetscFunctionBegin;
1392   /* do nondiagonal part */
1393   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1394   /* do local part */
1395   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1396   /* add partial results together */
1397   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1398   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   /* do nondiagonal part */
1409   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1410   /* do local part */
1411   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1412   /* add partial results together */
1413   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1414   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*
1419   This only works correctly for square matrices where the subblock A->A is the
1420    diagonal block
1421 */
1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1440   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1445 {
1446   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1447   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1448   PetscErrorCode ierr;
1449   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1450   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1451   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1452 
1453   PetscFunctionBegin;
1454   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1455   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1456   mat->getrowactive = PETSC_TRUE;
1457 
1458   if (!mat->rowvalues && (idx || v)) {
1459     /*
1460         allocate enough space to hold information from the longest row.
1461     */
1462     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1463     PetscInt    max = 1,mbs = mat->mbs,tmp;
1464     for (i=0; i<mbs; i++) {
1465       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1466       if (max < tmp) max = tmp;
1467     }
1468     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1469   }
1470   lrow = row - brstart;
1471 
1472   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1473   if (!v)   {pvA = 0; pvB = 0;}
1474   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1475   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1476   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1477   nztot = nzA + nzB;
1478 
1479   cmap = mat->garray;
1480   if (v  || idx) {
1481     if (nztot) {
1482       /* Sort by increasing column numbers, assuming A and B already sorted */
1483       PetscInt imark = -1;
1484       if (v) {
1485         *v = v_p = mat->rowvalues;
1486         for (i=0; i<nzB; i++) {
1487           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1488           else break;
1489         }
1490         imark = i;
1491         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1492         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1493       }
1494       if (idx) {
1495         *idx = idx_p = mat->rowindices;
1496         if (imark > -1) {
1497           for (i=0; i<imark; i++) {
1498             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1499           }
1500         } else {
1501           for (i=0; i<nzB; i++) {
1502             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1503             else break;
1504           }
1505           imark = i;
1506         }
1507         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1508         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1509       }
1510     } else {
1511       if (idx) *idx = 0;
1512       if (v)   *v   = 0;
1513     }
1514   }
1515   *nz  = nztot;
1516   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1517   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1532 {
1533   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1538   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1543 {
1544   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1545   Mat            A  = a->A,B = a->B;
1546   PetscErrorCode ierr;
1547   PetscLogDouble isend[5],irecv[5];
1548 
1549   PetscFunctionBegin;
1550   info->block_size = (PetscReal)matin->rmap->bs;
1551 
1552   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1553 
1554   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1555   isend[3] = info->memory;  isend[4] = info->mallocs;
1556 
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558 
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561 
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else if (flag == MAT_GLOBAL_SUM) {
1577     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1578 
1579     info->nz_used      = irecv[0];
1580     info->nz_allocated = irecv[1];
1581     info->nz_unneeded  = irecv[2];
1582     info->memory       = irecv[3];
1583     info->mallocs      = irecv[4];
1584   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     MatCheckPreallocated(A,1);
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     MatCheckPreallocated(A,1);
1609     a->roworiented = flg;
1610 
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615   case MAT_SORTED_FULL:
1616     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1617     break;
1618   case MAT_IGNORE_OFF_PROC_ENTRIES:
1619     a->donotstash = flg;
1620     break;
1621   case MAT_USE_HASH_TABLE:
1622     a->ht_flag = flg;
1623     a->ht_fact = 1.39;
1624     break;
1625   case MAT_SYMMETRIC:
1626   case MAT_STRUCTURALLY_SYMMETRIC:
1627   case MAT_HERMITIAN:
1628   case MAT_SUBMAT_SINGLEIS:
1629   case MAT_SYMMETRY_ETERNAL:
1630     MatCheckPreallocated(A,1);
1631     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1632     break;
1633   default:
1634     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1642   Mat_SeqBAIJ    *Aloc;
1643   Mat            B;
1644   PetscErrorCode ierr;
1645   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1646   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1647   MatScalar      *a;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1651     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1652     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1653     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1654     /* Do not know preallocation information, but must set block size */
1655     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1656   } else {
1657     B = *matout;
1658   }
1659 
1660   /* copy over the A part */
1661   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1662   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1663   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1664 
1665   for (i=0; i<mbs; i++) {
1666     rvals[0] = bs*(baij->rstartbs + i);
1667     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668     for (j=ai[i]; j<ai[i+1]; j++) {
1669       col = (baij->cstartbs+aj[j])*bs;
1670       for (k=0; k<bs; k++) {
1671         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1672 
1673         col++; a += bs;
1674       }
1675     }
1676   }
1677   /* copy over the B part */
1678   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1679   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1680   for (i=0; i<mbs; i++) {
1681     rvals[0] = bs*(baij->rstartbs + i);
1682     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1683     for (j=ai[i]; j<ai[i+1]; j++) {
1684       col = baij->garray[aj[j]]*bs;
1685       for (k=0; k<bs; k++) {
1686         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1687         col++;
1688         a += bs;
1689       }
1690     }
1691   }
1692   ierr = PetscFree(rvals);CHKERRQ(ierr);
1693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695 
1696   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1697   else {
1698     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1704 {
1705   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1706   Mat            a     = baij->A,b = baij->B;
1707   PetscErrorCode ierr;
1708   PetscInt       s1,s2,s3;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1712   if (rr) {
1713     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1714     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1715     /* Overlap communication with computation. */
1716     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717   }
1718   if (ll) {
1719     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1720     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1721     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1722   }
1723   /* scale  the diagonal block */
1724   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1725 
1726   if (rr) {
1727     /* Do a scatter end and then right scale the off-diagonal block */
1728     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1735 {
1736   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1737   PetscInt      *lrows;
1738   PetscInt       r, len;
1739   PetscBool      cong;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   /* get locally owned rows */
1744   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1745   /* fix right hand side if needed */
1746   if (x && b) {
1747     const PetscScalar *xx;
1748     PetscScalar       *bb;
1749 
1750     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1751     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1752     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1753     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1754     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1755   }
1756 
1757   /* actually zap the local rows */
1758   /*
1759         Zero the required rows. If the "diagonal block" of the matrix
1760      is square and the user wishes to set the diagonal we use separate
1761      code so that MatSetValues() is not called for each diagonal allocating
1762      new memory, thus calling lots of mallocs and slowing things down.
1763 
1764   */
1765   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1766   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1767   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1768   if ((diag != 0.0) && cong) {
1769     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1770   } else if (diag != 0.0) {
1771     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1772     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1773        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1774     for (r = 0; r < len; ++r) {
1775       const PetscInt row = lrows[r] + A->rmap->rstart;
1776       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1777     }
1778     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1780   } else {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1782   }
1783   ierr = PetscFree(lrows);CHKERRQ(ierr);
1784 
1785   /* only change matrix nonzero state if pattern was allowed to be changed */
1786   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1787     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1788     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1794 {
1795   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1796   PetscErrorCode    ierr;
1797   PetscMPIInt       n = A->rmap->n;
1798   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1799   PetscInt          *lrows,*owners = A->rmap->range;
1800   PetscSFNode       *rrows;
1801   PetscSF           sf;
1802   const PetscScalar *xx;
1803   PetscScalar       *bb,*mask;
1804   Vec               xmask,lmask;
1805   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1806   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1807   PetscScalar       *aa;
1808 
1809   PetscFunctionBegin;
1810   /* Create SF where leaves are input rows and roots are owned rows */
1811   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1812   for (r = 0; r < n; ++r) lrows[r] = -1;
1813   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1814   for (r = 0; r < N; ++r) {
1815     const PetscInt idx   = rows[r];
1816     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1817     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1818       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1819     }
1820     rrows[r].rank  = p;
1821     rrows[r].index = rows[r] - owners[p];
1822   }
1823   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1824   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1825   /* Collect flags for rows to be zeroed */
1826   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1827   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1828   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1829   /* Compress and put in row numbers */
1830   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1831   /* zero diagonal part of matrix */
1832   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1833   /* handle off diagonal part of matrix */
1834   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1835   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1836   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1837   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1838   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1839   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1840   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1841   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1842   if (x) {
1843     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1844     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1845     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1846     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1847   }
1848   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1849   /* remove zeroed rows of off diagonal matrix */
1850   for (i = 0; i < len; ++i) {
1851     row   = lrows[i];
1852     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1853     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1854     for (k = 0; k < count; ++k) {
1855       aa[0] = 0.0;
1856       aa   += bs;
1857     }
1858   }
1859   /* loop over all elements of off process part of matrix zeroing removed columns*/
1860   for (i = 0; i < l->B->rmap->N; ++i) {
1861     row = i/bs;
1862     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1863       for (k = 0; k < bs; ++k) {
1864         col = bs*baij->j[j] + k;
1865         if (PetscAbsScalar(mask[col])) {
1866           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1867           if (x) bb[i] -= aa[0]*xx[col];
1868           aa[0] = 0.0;
1869         }
1870       }
1871     }
1872   }
1873   if (x) {
1874     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1875     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1876   }
1877   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1878   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1879   ierr = PetscFree(lrows);CHKERRQ(ierr);
1880 
1881   /* only change matrix nonzero state if pattern was allowed to be changed */
1882   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1883     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1884     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1890 {
1891   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1900 
1901 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1902 {
1903   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1904   Mat            a,b,c,d;
1905   PetscBool      flg;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   a = matA->A; b = matA->B;
1910   c = matB->A; d = matB->B;
1911 
1912   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1913   if (flg) {
1914     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1915   }
1916   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1921 {
1922   PetscErrorCode ierr;
1923   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1924   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1925 
1926   PetscFunctionBegin;
1927   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1928   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1929     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1930   } else {
1931     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1932     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1933   }
1934   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1939 {
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1948 {
1949   PetscErrorCode ierr;
1950   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1951   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1952   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1953 
1954   PetscFunctionBegin;
1955   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1960 {
1961   PetscErrorCode ierr;
1962   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1963   PetscBLASInt   bnz,one=1;
1964   Mat_SeqBAIJ    *x,*y;
1965   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1966 
1967   PetscFunctionBegin;
1968   if (str == SAME_NONZERO_PATTERN) {
1969     PetscScalar alpha = a;
1970     x    = (Mat_SeqBAIJ*)xx->A->data;
1971     y    = (Mat_SeqBAIJ*)yy->A->data;
1972     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1973     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1974     x    = (Mat_SeqBAIJ*)xx->B->data;
1975     y    = (Mat_SeqBAIJ*)yy->B->data;
1976     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1977     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1978     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1979   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   } else {
1982     Mat      B;
1983     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1984     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1985     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1986     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1987     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1988     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1989     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1990     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1991     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1992     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1993     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1994     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1995     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1996     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1997     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1998     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2004 {
2005   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2010   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2015 {
2016   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2017   PetscErrorCode ierr;
2018 
2019   PetscFunctionBegin;
2020   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2021   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2026 {
2027   PetscErrorCode ierr;
2028   IS             iscol_local;
2029   PetscInt       csize;
2030 
2031   PetscFunctionBegin;
2032   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2033   if (call == MAT_REUSE_MATRIX) {
2034     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2035     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2036   } else {
2037     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2038   }
2039   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2040   if (call == MAT_INITIAL_MATRIX) {
2041     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2042     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 /*
2048   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2049   in local and then by concatenating the local matrices the end result.
2050   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2051   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2052 */
2053 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2054 {
2055   PetscErrorCode ierr;
2056   PetscMPIInt    rank,size;
2057   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2058   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2059   Mat            M,Mreuse;
2060   MatScalar      *vwork,*aa;
2061   MPI_Comm       comm;
2062   IS             isrow_new, iscol_new;
2063   Mat_SeqBAIJ    *aij;
2064 
2065   PetscFunctionBegin;
2066   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069   /* The compression and expansion should be avoided. Doesn't point
2070      out errors, might change the indices, hence buggey */
2071   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2072   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2073 
2074   if (call ==  MAT_REUSE_MATRIX) {
2075     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2076     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2077     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2078   } else {
2079     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2080   }
2081   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2082   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2083   /*
2084       m - number of local rows
2085       n - number of columns (same on all processors)
2086       rstart - first row in new global matrix generated
2087   */
2088   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2089   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2090   m    = m/bs;
2091   n    = n/bs;
2092 
2093   if (call == MAT_INITIAL_MATRIX) {
2094     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2095     ii  = aij->i;
2096     jj  = aij->j;
2097 
2098     /*
2099         Determine the number of non-zeros in the diagonal and off-diagonal
2100         portions of the matrix in order to do correct preallocation
2101     */
2102 
2103     /* first get start and end of "diagonal" columns */
2104     if (csize == PETSC_DECIDE) {
2105       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2106       if (mglobal == n*bs) { /* square matrix */
2107         nlocal = m;
2108       } else {
2109         nlocal = n/size + ((n % size) > rank);
2110       }
2111     } else {
2112       nlocal = csize/bs;
2113     }
2114     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2115     rstart = rend - nlocal;
2116     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2117 
2118     /* next, compute all the lengths */
2119     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2120     for (i=0; i<m; i++) {
2121       jend = ii[i+1] - ii[i];
2122       olen = 0;
2123       dlen = 0;
2124       for (j=0; j<jend; j++) {
2125         if (*jj < rstart || *jj >= rend) olen++;
2126         else dlen++;
2127         jj++;
2128       }
2129       olens[i] = olen;
2130       dlens[i] = dlen;
2131     }
2132     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2133     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2134     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2135     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2136     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2137     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2138   } else {
2139     PetscInt ml,nl;
2140 
2141     M    = *newmat;
2142     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2143     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2144     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2145     /*
2146          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2147        rather than the slower MatSetValues().
2148     */
2149     M->was_assembled = PETSC_TRUE;
2150     M->assembled     = PETSC_FALSE;
2151   }
2152   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2153   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2154   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2155   ii   = aij->i;
2156   jj   = aij->j;
2157   aa   = aij->a;
2158   for (i=0; i<m; i++) {
2159     row   = rstart/bs + i;
2160     nz    = ii[i+1] - ii[i];
2161     cwork = jj;     jj += nz;
2162     vwork = aa;     aa += nz*bs*bs;
2163     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2164   }
2165 
2166   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   *newmat = M;
2169 
2170   /* save submatrix used in processor for next request */
2171   if (call ==  MAT_INITIAL_MATRIX) {
2172     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2173     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2179 {
2180   MPI_Comm       comm,pcomm;
2181   PetscInt       clocal_size,nrows;
2182   const PetscInt *rows;
2183   PetscMPIInt    size;
2184   IS             crowp,lcolp;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2189   /* make a collective version of 'rowp' */
2190   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2191   if (pcomm==comm) {
2192     crowp = rowp;
2193   } else {
2194     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2195     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2196     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2197     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2198   }
2199   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2200   /* make a local version of 'colp' */
2201   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2202   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2203   if (size==1) {
2204     lcolp = colp;
2205   } else {
2206     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2207   }
2208   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2209   /* now we just get the submatrix */
2210   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2211   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2212   /* clean up */
2213   if (pcomm!=comm) {
2214     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2215   }
2216   if (size>1) {
2217     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2218   }
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2223 {
2224   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2225   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2226 
2227   PetscFunctionBegin;
2228   if (nghosts) *nghosts = B->nbs;
2229   if (ghosts) *ghosts = baij->garray;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2234 {
2235   Mat            B;
2236   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2237   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2238   Mat_SeqAIJ     *b;
2239   PetscErrorCode ierr;
2240   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2241   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2242   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2243 
2244   PetscFunctionBegin;
2245   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2246   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2247 
2248   /* ----------------------------------------------------------------
2249      Tell every processor the number of nonzeros per row
2250   */
2251   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2252   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2253     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2254   }
2255   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2256   displs    = recvcounts + size;
2257   for (i=0; i<size; i++) {
2258     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2259     displs[i]     = A->rmap->range[i]/bs;
2260   }
2261 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2262   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2263 #else
2264   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2265   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2266 #endif
2267   /* ---------------------------------------------------------------
2268      Create the sequential matrix of the same type as the local block diagonal
2269   */
2270   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2271   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2272   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2273   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2274   b    = (Mat_SeqAIJ*)B->data;
2275 
2276   /*--------------------------------------------------------------------
2277     Copy my part of matrix column indices over
2278   */
2279   sendcount  = ad->nz + bd->nz;
2280   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2281   a_jsendbuf = ad->j;
2282   b_jsendbuf = bd->j;
2283   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2284   cnt        = 0;
2285   for (i=0; i<n; i++) {
2286 
2287     /* put in lower diagonal portion */
2288     m = bd->i[i+1] - bd->i[i];
2289     while (m > 0) {
2290       /* is it above diagonal (in bd (compressed) numbering) */
2291       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2292       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2293       m--;
2294     }
2295 
2296     /* put in diagonal portion */
2297     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2298       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2299     }
2300 
2301     /* put in upper diagonal portion */
2302     while (m-- > 0) {
2303       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2304     }
2305   }
2306   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2307 
2308   /*--------------------------------------------------------------------
2309     Gather all column indices to all processors
2310   */
2311   for (i=0; i<size; i++) {
2312     recvcounts[i] = 0;
2313     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2314       recvcounts[i] += lens[j];
2315     }
2316   }
2317   displs[0] = 0;
2318   for (i=1; i<size; i++) {
2319     displs[i] = displs[i-1] + recvcounts[i-1];
2320   }
2321 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2322   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2323 #else
2324   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2325 #endif
2326   /*--------------------------------------------------------------------
2327     Assemble the matrix into useable form (note numerical values not yet set)
2328   */
2329   /* set the b->ilen (length of each row) values */
2330   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2331   /* set the b->i indices */
2332   b->i[0] = 0;
2333   for (i=1; i<=A->rmap->N/bs; i++) {
2334     b->i[i] = b->i[i-1] + lens[i-1];
2335   }
2336   ierr = PetscFree(lens);CHKERRQ(ierr);
2337   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2340 
2341   if (A->symmetric) {
2342     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2343   } else if (A->hermitian) {
2344     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2345   } else if (A->structurally_symmetric) {
2346     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2347   }
2348   *newmat = B;
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2353 {
2354   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2355   PetscErrorCode ierr;
2356   Vec            bb1 = 0;
2357 
2358   PetscFunctionBegin;
2359   if (flag == SOR_APPLY_UPPER) {
2360     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2365     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2366   }
2367 
2368   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2369     if (flag & SOR_ZERO_INITIAL_GUESS) {
2370       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2371       its--;
2372     }
2373 
2374     while (its--) {
2375       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2376       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2377 
2378       /* update rhs: bb1 = bb - B*x */
2379       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2380       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2381 
2382       /* local sweep */
2383       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2384     }
2385   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2386     if (flag & SOR_ZERO_INITIAL_GUESS) {
2387       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2388       its--;
2389     }
2390     while (its--) {
2391       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2393 
2394       /* update rhs: bb1 = bb - B*x */
2395       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2396       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2397 
2398       /* local sweep */
2399       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2400     }
2401   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2402     if (flag & SOR_ZERO_INITIAL_GUESS) {
2403       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2404       its--;
2405     }
2406     while (its--) {
2407       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2408       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2409 
2410       /* update rhs: bb1 = bb - B*x */
2411       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2412       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2413 
2414       /* local sweep */
2415       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2416     }
2417   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2418 
2419   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2424 {
2425   PetscErrorCode ierr;
2426   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2427   PetscInt       N,i,*garray = aij->garray;
2428   PetscInt       ib,jb,bs = A->rmap->bs;
2429   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2430   MatScalar      *a_val = a_aij->a;
2431   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2432   MatScalar      *b_val = b_aij->a;
2433   PetscReal      *work;
2434 
2435   PetscFunctionBegin;
2436   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2437   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2438   if (type == NORM_2) {
2439     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2440       for (jb=0; jb<bs; jb++) {
2441         for (ib=0; ib<bs; ib++) {
2442           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2443           a_val++;
2444         }
2445       }
2446     }
2447     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2448       for (jb=0; jb<bs; jb++) {
2449         for (ib=0; ib<bs; ib++) {
2450           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2451           b_val++;
2452         }
2453       }
2454     }
2455   } else if (type == NORM_1) {
2456     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2457       for (jb=0; jb<bs; jb++) {
2458         for (ib=0; ib<bs; ib++) {
2459           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2460           a_val++;
2461         }
2462       }
2463     }
2464     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2465       for (jb=0; jb<bs; jb++) {
2466        for (ib=0; ib<bs; ib++) {
2467           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2468           b_val++;
2469         }
2470       }
2471     }
2472   } else if (type == NORM_INFINITY) {
2473     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2474       for (jb=0; jb<bs; jb++) {
2475         for (ib=0; ib<bs; ib++) {
2476           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2477           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2478           a_val++;
2479         }
2480       }
2481     }
2482     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           int col = garray[b_aij->j[i]] * bs + jb;
2486           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2487           b_val++;
2488         }
2489       }
2490     }
2491   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2492   if (type == NORM_INFINITY) {
2493     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2494   } else {
2495     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2496   }
2497   ierr = PetscFree(work);CHKERRQ(ierr);
2498   if (type == NORM_2) {
2499     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2505 {
2506   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2507   PetscErrorCode ierr;
2508 
2509   PetscFunctionBegin;
2510   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2511   A->factorerrortype             = a->A->factorerrortype;
2512   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2513   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2518 {
2519   PetscErrorCode ierr;
2520   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2521   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2522 
2523   PetscFunctionBegin;
2524   if (!Y->preallocated) {
2525     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2526   } else if (!aij->nz) {
2527     PetscInt nonew = aij->nonew;
2528     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2529     aij->nonew = nonew;
2530   }
2531   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2536 {
2537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2542   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2543   if (d) {
2544     PetscInt rstart;
2545     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2546     *d += rstart/A->rmap->bs;
2547 
2548   }
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2553 {
2554   PetscFunctionBegin;
2555   *a = ((Mat_MPIBAIJ*)A->data)->A;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2561                                        MatGetRow_MPIBAIJ,
2562                                        MatRestoreRow_MPIBAIJ,
2563                                        MatMult_MPIBAIJ,
2564                                 /* 4*/ MatMultAdd_MPIBAIJ,
2565                                        MatMultTranspose_MPIBAIJ,
2566                                        MatMultTransposeAdd_MPIBAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIBAIJ,
2574                                        MatTranspose_MPIBAIJ,
2575                                 /*15*/ MatGetInfo_MPIBAIJ,
2576                                        MatEqual_MPIBAIJ,
2577                                        MatGetDiagonal_MPIBAIJ,
2578                                        MatDiagonalScale_MPIBAIJ,
2579                                        MatNorm_MPIBAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2581                                        MatAssemblyEnd_MPIBAIJ,
2582                                        MatSetOption_MPIBAIJ,
2583                                        MatZeroEntries_MPIBAIJ,
2584                                 /*24*/ MatZeroRows_MPIBAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIBAIJ,
2590                                        0,
2591                                        0,
2592                                        MatGetDiagonalBlock_MPIBAIJ,
2593                                        0,
2594                                 /*34*/ MatDuplicate_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*39*/ MatAXPY_MPIBAIJ,
2600                                        MatCreateSubMatrices_MPIBAIJ,
2601                                        MatIncreaseOverlap_MPIBAIJ,
2602                                        MatGetValues_MPIBAIJ,
2603                                        MatCopy_MPIBAIJ,
2604                                 /*44*/ 0,
2605                                        MatScale_MPIBAIJ,
2606                                        MatShift_MPIBAIJ,
2607                                        0,
2608                                        MatZeroRowsColumns_MPIBAIJ,
2609                                 /*49*/ 0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2615                                        0,
2616                                        MatSetUnfactored_MPIBAIJ,
2617                                        MatPermute_MPIBAIJ,
2618                                        MatSetValuesBlocked_MPIBAIJ,
2619                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2620                                        MatDestroy_MPIBAIJ,
2621                                        MatView_MPIBAIJ,
2622                                        0,
2623                                        0,
2624                                 /*64*/ 0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*74*/ 0,
2635                                        MatFDColoringApply_BAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*79*/ 0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        MatLoad_MPIBAIJ,
2644                                 /*84*/ 0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*89*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*94*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*99*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*104*/0,
2665                                        MatRealPart_MPIBAIJ,
2666                                        MatImaginaryPart_MPIBAIJ,
2667                                        0,
2668                                        0,
2669                                 /*109*/0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        MatMissingDiagonal_MPIBAIJ,
2674                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2675                                        0,
2676                                        MatGetGhosts_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*119*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatGetMultiProcBlock_MPIBAIJ,
2684                                 /*124*/0,
2685                                        MatGetColumnNorms_MPIBAIJ,
2686                                        MatInvertBlockDiagonal_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                /*129*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                /*134*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                /*139*/ MatSetBlockSizes_Default,
2700                                        0,
2701                                        0,
2702                                        MatFDColoringSetUp_MPIXAIJ,
2703                                        0,
2704                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2705 };
2706 
2707 
2708 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2709 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2710 
2711 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2712 {
2713   PetscInt       m,rstart,cstart,cend;
2714   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2715   const PetscInt *JJ    =0;
2716   PetscScalar    *values=0;
2717   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2718   PetscErrorCode ierr;
2719   PetscBool      nooffprocentries;
2720 
2721   PetscFunctionBegin;
2722   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2723   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2724   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2725   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2726   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2727   m      = B->rmap->n/bs;
2728   rstart = B->rmap->rstart/bs;
2729   cstart = B->cmap->rstart/bs;
2730   cend   = B->cmap->rend/bs;
2731 
2732   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2733   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2734   for (i=0; i<m; i++) {
2735     nz = ii[i+1] - ii[i];
2736     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2737     nz_max = PetscMax(nz_max,nz);
2738     dlen   = 0;
2739     olen   = 0;
2740     JJ     = jj + ii[i];
2741     for (j=0; j<nz; j++) {
2742       if (*JJ < cstart || *JJ >= cend) olen++;
2743       else dlen++;
2744       JJ++;
2745     }
2746     d_nnz[i] = dlen;
2747     o_nnz[i] = olen;
2748   }
2749   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2750   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2751 
2752   values = (PetscScalar*)V;
2753   if (!values) {
2754     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2755   }
2756   for (i=0; i<m; i++) {
2757     PetscInt          row    = i + rstart;
2758     PetscInt          ncols  = ii[i+1] - ii[i];
2759     const PetscInt    *icols = jj + ii[i];
2760     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2761       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2762       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2763     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2764       PetscInt j;
2765       for (j=0; j<ncols; j++) {
2766         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2767         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2768       }
2769     }
2770   }
2771 
2772   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2773   nooffprocentries    = B->nooffprocentries;
2774   B->nooffprocentries = PETSC_TRUE;
2775   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2776   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2777   B->nooffprocentries = nooffprocentries;
2778 
2779   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 /*@C
2784    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2785 
2786    Collective
2787 
2788    Input Parameters:
2789 +  B - the matrix
2790 .  bs - the block size
2791 .  i - the indices into j for the start of each local row (starts with zero)
2792 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2793 -  v - optional values in the matrix
2794 
2795    Level: advanced
2796 
2797    Notes:
2798     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2799    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2800    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2801    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2802    block column and the second index is over columns within a block.
2803 
2804    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2805 
2806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2807 @*/
2808 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2809 {
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2814   PetscValidType(B,1);
2815   PetscValidLogicalCollectiveInt(B,bs,2);
2816   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2821 {
2822   Mat_MPIBAIJ    *b;
2823   PetscErrorCode ierr;
2824   PetscInt       i;
2825   PetscMPIInt    size;
2826 
2827   PetscFunctionBegin;
2828   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2829   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2830   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2831   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2832 
2833   if (d_nnz) {
2834     for (i=0; i<B->rmap->n/bs; i++) {
2835       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2836     }
2837   }
2838   if (o_nnz) {
2839     for (i=0; i<B->rmap->n/bs; i++) {
2840       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2841     }
2842   }
2843 
2844   b      = (Mat_MPIBAIJ*)B->data;
2845   b->bs2 = bs*bs;
2846   b->mbs = B->rmap->n/bs;
2847   b->nbs = B->cmap->n/bs;
2848   b->Mbs = B->rmap->N/bs;
2849   b->Nbs = B->cmap->N/bs;
2850 
2851   for (i=0; i<=b->size; i++) {
2852     b->rangebs[i] = B->rmap->range[i]/bs;
2853   }
2854   b->rstartbs = B->rmap->rstart/bs;
2855   b->rendbs   = B->rmap->rend/bs;
2856   b->cstartbs = B->cmap->rstart/bs;
2857   b->cendbs   = B->cmap->rend/bs;
2858 
2859 #if defined(PETSC_USE_CTABLE)
2860   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2861 #else
2862   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2863 #endif
2864   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2865   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2866   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2867 
2868   /* Because the B will have been resized we simply destroy it and create a new one each time */
2869   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2870   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2871   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2872   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2873   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2874   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2875 
2876   if (!B->preallocated) {
2877     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2878     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2879     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2880     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2881     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2882   }
2883 
2884   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2885   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2886   B->preallocated  = PETSC_TRUE;
2887   B->was_assembled = PETSC_FALSE;
2888   B->assembled     = PETSC_FALSE;
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2893 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2894 
2895 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2896 {
2897   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2898   PetscErrorCode ierr;
2899   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2900   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2901   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2902 
2903   PetscFunctionBegin;
2904   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2905   ii[0] = 0;
2906   for (i=0; i<M; i++) {
2907     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2908     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2909     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2910     /* remove one from count of matrix has diagonal */
2911     for (j=id[i]; j<id[i+1]; j++) {
2912       if (jd[j] == i) {ii[i+1]--;break;}
2913     }
2914   }
2915   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2916   cnt  = 0;
2917   for (i=0; i<M; i++) {
2918     for (j=io[i]; j<io[i+1]; j++) {
2919       if (garray[jo[j]] > rstart) break;
2920       jj[cnt++] = garray[jo[j]];
2921     }
2922     for (k=id[i]; k<id[i+1]; k++) {
2923       if (jd[k] != i) {
2924         jj[cnt++] = rstart + jd[k];
2925       }
2926     }
2927     for (; j<io[i+1]; j++) {
2928       jj[cnt++] = garray[jo[j]];
2929     }
2930   }
2931   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2936 
2937 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2938 
2939 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2940 {
2941   PetscErrorCode ierr;
2942   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2943   Mat            B;
2944   Mat_MPIAIJ     *b;
2945 
2946   PetscFunctionBegin;
2947   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2948 
2949   if (reuse == MAT_REUSE_MATRIX) {
2950     B = *newmat;
2951   } else {
2952     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2953     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2954     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2955     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2956     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2957     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2958   }
2959   b = (Mat_MPIAIJ*) B->data;
2960 
2961   if (reuse == MAT_REUSE_MATRIX) {
2962     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2963     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2964   } else {
2965     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2966     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2967     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2968     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2969     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2970     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   }
2973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975 
2976   if (reuse == MAT_INPLACE_MATRIX) {
2977     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2978   } else {
2979    *newmat = B;
2980   }
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 /*MC
2985    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2986 
2987    Options Database Keys:
2988 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2989 . -mat_block_size <bs> - set the blocksize used to store the matrix
2990 - -mat_use_hash_table <fact>
2991 
2992    Level: beginner
2993 
2994    Notes:
2995     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2996     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2997 
2998 .seealso: MatCreateMPIBAIJ
2999 M*/
3000 
3001 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3002 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3003 
3004 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3005 {
3006   Mat_MPIBAIJ    *b;
3007   PetscErrorCode ierr;
3008   PetscBool      flg = PETSC_FALSE;
3009 
3010   PetscFunctionBegin;
3011   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3012   B->data = (void*)b;
3013 
3014   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3015   B->assembled = PETSC_FALSE;
3016 
3017   B->insertmode = NOT_SET_VALUES;
3018   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3019   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3020 
3021   /* build local table of row and column ownerships */
3022   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3023 
3024   /* build cache for off array entries formed */
3025   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3026 
3027   b->donotstash  = PETSC_FALSE;
3028   b->colmap      = NULL;
3029   b->garray      = NULL;
3030   b->roworiented = PETSC_TRUE;
3031 
3032   /* stuff used in block assembly */
3033   b->barray = 0;
3034 
3035   /* stuff used for matrix vector multiply */
3036   b->lvec  = 0;
3037   b->Mvctx = 0;
3038 
3039   /* stuff for MatGetRow() */
3040   b->rowindices   = 0;
3041   b->rowvalues    = 0;
3042   b->getrowactive = PETSC_FALSE;
3043 
3044   /* hash table stuff */
3045   b->ht           = 0;
3046   b->hd           = 0;
3047   b->ht_size      = 0;
3048   b->ht_flag      = PETSC_FALSE;
3049   b->ht_fact      = 0;
3050   b->ht_total_ct  = 0;
3051   b->ht_insert_ct = 0;
3052 
3053   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3054   b->ijonly = PETSC_FALSE;
3055 
3056 
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3060 #if defined(PETSC_HAVE_HYPRE)
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3062 #endif
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3071   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3072 
3073   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3074   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3075   if (flg) {
3076     PetscReal fact = 1.39;
3077     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3078     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3079     if (fact <= 1.0) fact = 1.39;
3080     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3081     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3082   }
3083   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 /*MC
3088    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3089 
3090    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3091    and MATMPIBAIJ otherwise.
3092 
3093    Options Database Keys:
3094 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3095 
3096   Level: beginner
3097 
3098 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3099 M*/
3100 
3101 /*@C
3102    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3103    (block compressed row).  For good matrix assembly performance
3104    the user should preallocate the matrix storage by setting the parameters
3105    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3106    performance can be increased by more than a factor of 50.
3107 
3108    Collective on Mat
3109 
3110    Input Parameters:
3111 +  B - the matrix
3112 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3113           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3114 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3115            submatrix  (same for all local rows)
3116 .  d_nnz - array containing the number of block nonzeros in the various block rows
3117            of the in diagonal portion of the local (possibly different for each block
3118            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3119            set it even if it is zero.
3120 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3121            submatrix (same for all local rows).
3122 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3123            off-diagonal portion of the local submatrix (possibly different for
3124            each block row) or NULL.
3125 
3126    If the *_nnz parameter is given then the *_nz parameter is ignored
3127 
3128    Options Database Keys:
3129 +   -mat_block_size - size of the blocks to use
3130 -   -mat_use_hash_table <fact>
3131 
3132    Notes:
3133    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3134    than it must be used on all processors that share the object for that argument.
3135 
3136    Storage Information:
3137    For a square global matrix we define each processor's diagonal portion
3138    to be its local rows and the corresponding columns (a square submatrix);
3139    each processor's off-diagonal portion encompasses the remainder of the
3140    local matrix (a rectangular submatrix).
3141 
3142    The user can specify preallocated storage for the diagonal part of
3143    the local submatrix with either d_nz or d_nnz (not both).  Set
3144    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3145    memory allocation.  Likewise, specify preallocated storage for the
3146    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3147 
3148    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3149    the figure below we depict these three local rows and all columns (0-11).
3150 
3151 .vb
3152            0 1 2 3 4 5 6 7 8 9 10 11
3153           --------------------------
3154    row 3  |o o o d d d o o o o  o  o
3155    row 4  |o o o d d d o o o o  o  o
3156    row 5  |o o o d d d o o o o  o  o
3157           --------------------------
3158 .ve
3159 
3160    Thus, any entries in the d locations are stored in the d (diagonal)
3161    submatrix, and any entries in the o locations are stored in the
3162    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3163    stored simply in the MATSEQBAIJ format for compressed row storage.
3164 
3165    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3166    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3167    In general, for PDE problems in which most nonzeros are near the diagonal,
3168    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3169    or you will get TERRIBLE performance; see the users' manual chapter on
3170    matrices.
3171 
3172    You can call MatGetInfo() to get information on how effective the preallocation was;
3173    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3174    You can also run with the option -info and look for messages with the string
3175    malloc in them to see if additional memory allocation was needed.
3176 
3177    Level: intermediate
3178 
3179 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3180 @*/
3181 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3182 {
3183   PetscErrorCode ierr;
3184 
3185   PetscFunctionBegin;
3186   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3187   PetscValidType(B,1);
3188   PetscValidLogicalCollectiveInt(B,bs,2);
3189   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3190   PetscFunctionReturn(0);
3191 }
3192 
3193 /*@C
3194    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3195    (block compressed row).  For good matrix assembly performance
3196    the user should preallocate the matrix storage by setting the parameters
3197    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3198    performance can be increased by more than a factor of 50.
3199 
3200    Collective
3201 
3202    Input Parameters:
3203 +  comm - MPI communicator
3204 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3205           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3206 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3207            This value should be the same as the local size used in creating the
3208            y vector for the matrix-vector product y = Ax.
3209 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3210            This value should be the same as the local size used in creating the
3211            x vector for the matrix-vector product y = Ax.
3212 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3213 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3214 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3215            submatrix  (same for all local rows)
3216 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3217            of the in diagonal portion of the local (possibly different for each block
3218            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3219            and set it even if it is zero.
3220 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3221            submatrix (same for all local rows).
3222 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3223            off-diagonal portion of the local submatrix (possibly different for
3224            each block row) or NULL.
3225 
3226    Output Parameter:
3227 .  A - the matrix
3228 
3229    Options Database Keys:
3230 +   -mat_block_size - size of the blocks to use
3231 -   -mat_use_hash_table <fact>
3232 
3233    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3234    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3235    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3236 
3237    Notes:
3238    If the *_nnz parameter is given then the *_nz parameter is ignored
3239 
3240    A nonzero block is any block that as 1 or more nonzeros in it
3241 
3242    The user MUST specify either the local or global matrix dimensions
3243    (possibly both).
3244 
3245    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3246    than it must be used on all processors that share the object for that argument.
3247 
3248    Storage Information:
3249    For a square global matrix we define each processor's diagonal portion
3250    to be its local rows and the corresponding columns (a square submatrix);
3251    each processor's off-diagonal portion encompasses the remainder of the
3252    local matrix (a rectangular submatrix).
3253 
3254    The user can specify preallocated storage for the diagonal part of
3255    the local submatrix with either d_nz or d_nnz (not both).  Set
3256    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3257    memory allocation.  Likewise, specify preallocated storage for the
3258    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3259 
3260    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3261    the figure below we depict these three local rows and all columns (0-11).
3262 
3263 .vb
3264            0 1 2 3 4 5 6 7 8 9 10 11
3265           --------------------------
3266    row 3  |o o o d d d o o o o  o  o
3267    row 4  |o o o d d d o o o o  o  o
3268    row 5  |o o o d d d o o o o  o  o
3269           --------------------------
3270 .ve
3271 
3272    Thus, any entries in the d locations are stored in the d (diagonal)
3273    submatrix, and any entries in the o locations are stored in the
3274    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3275    stored simply in the MATSEQBAIJ format for compressed row storage.
3276 
3277    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3278    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3279    In general, for PDE problems in which most nonzeros are near the diagonal,
3280    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3281    or you will get TERRIBLE performance; see the users' manual chapter on
3282    matrices.
3283 
3284    Level: intermediate
3285 
3286 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3287 @*/
3288 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3289 {
3290   PetscErrorCode ierr;
3291   PetscMPIInt    size;
3292 
3293   PetscFunctionBegin;
3294   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3295   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3296   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3297   if (size > 1) {
3298     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3299     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3300   } else {
3301     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3302     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3303   }
3304   PetscFunctionReturn(0);
3305 }
3306 
3307 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3308 {
3309   Mat            mat;
3310   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3311   PetscErrorCode ierr;
3312   PetscInt       len=0;
3313 
3314   PetscFunctionBegin;
3315   *newmat = 0;
3316   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3317   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3318   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3319 
3320   mat->factortype   = matin->factortype;
3321   mat->preallocated = PETSC_TRUE;
3322   mat->assembled    = PETSC_TRUE;
3323   mat->insertmode   = NOT_SET_VALUES;
3324 
3325   a             = (Mat_MPIBAIJ*)mat->data;
3326   mat->rmap->bs = matin->rmap->bs;
3327   a->bs2        = oldmat->bs2;
3328   a->mbs        = oldmat->mbs;
3329   a->nbs        = oldmat->nbs;
3330   a->Mbs        = oldmat->Mbs;
3331   a->Nbs        = oldmat->Nbs;
3332 
3333   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3334   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3335 
3336   a->size         = oldmat->size;
3337   a->rank         = oldmat->rank;
3338   a->donotstash   = oldmat->donotstash;
3339   a->roworiented  = oldmat->roworiented;
3340   a->rowindices   = 0;
3341   a->rowvalues    = 0;
3342   a->getrowactive = PETSC_FALSE;
3343   a->barray       = 0;
3344   a->rstartbs     = oldmat->rstartbs;
3345   a->rendbs       = oldmat->rendbs;
3346   a->cstartbs     = oldmat->cstartbs;
3347   a->cendbs       = oldmat->cendbs;
3348 
3349   /* hash table stuff */
3350   a->ht           = 0;
3351   a->hd           = 0;
3352   a->ht_size      = 0;
3353   a->ht_flag      = oldmat->ht_flag;
3354   a->ht_fact      = oldmat->ht_fact;
3355   a->ht_total_ct  = 0;
3356   a->ht_insert_ct = 0;
3357 
3358   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3359   if (oldmat->colmap) {
3360 #if defined(PETSC_USE_CTABLE)
3361     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3362 #else
3363     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3364     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3365     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3366 #endif
3367   } else a->colmap = 0;
3368 
3369   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3370     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3371     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3372     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3373   } else a->garray = 0;
3374 
3375   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3376   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3377   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3378   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3379   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3380 
3381   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3382   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3383   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3384   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3385   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3386   *newmat = mat;
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3391 {
3392   PetscErrorCode ierr;
3393   int            fd;
3394   PetscInt       i,nz,j,rstart,rend;
3395   PetscScalar    *vals,*buf;
3396   MPI_Comm       comm;
3397   MPI_Status     status;
3398   PetscMPIInt    rank,size,maxnz;
3399   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3400   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3401   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3402   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3403   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3404   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3405   PetscBool      isbinary;
3406 
3407   PetscFunctionBegin;
3408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3409   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3410 
3411   /* force binary viewer to load .info file if it has not yet done so */
3412   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3413   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3414   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3415   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3416   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3417   if (bs < 0) bs = 1;
3418 
3419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3420   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3421   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3422   if (!rank) {
3423     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3424     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3425     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3426   }
3427   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3428   M    = header[1]; N = header[2];
3429 
3430   /* If global sizes are set, check if they are consistent with that given in the file */
3431   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3432   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3433 
3434   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3435 
3436   /*
3437      This code adds extra rows to make sure the number of rows is
3438      divisible by the blocksize
3439   */
3440   Mbs        = M/bs;
3441   extra_rows = bs - M + bs*Mbs;
3442   if (extra_rows == bs) extra_rows = 0;
3443   else                  Mbs++;
3444   if (extra_rows && !rank) {
3445     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3446   }
3447 
3448   /* determine ownership of all rows */
3449   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3450     mbs = Mbs/size + ((Mbs % size) > rank);
3451     m   = mbs*bs;
3452   } else { /* User set */
3453     m   = newmat->rmap->n;
3454     mbs = m/bs;
3455   }
3456   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3457   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3458 
3459   /* process 0 needs enough room for process with most rows */
3460   if (!rank) {
3461     mmax = rowners[1];
3462     for (i=2; i<=size; i++) {
3463       mmax = PetscMax(mmax,rowners[i]);
3464     }
3465     mmax*=bs;
3466   } else mmax = -1;             /* unused, but compiler warns anyway */
3467 
3468   rowners[0] = 0;
3469   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3470   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3471   rstart = rowners[rank];
3472   rend   = rowners[rank+1];
3473 
3474   /* distribute row lengths to all processors */
3475   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3476   if (!rank) {
3477     mend = m;
3478     if (size == 1) mend = mend - extra_rows;
3479     ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3480     for (j=mend; j<m; j++) locrowlens[j] = 1;
3481     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3482     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
3483     procsnz[0] = 0;
3484     for (j=0; j<m; j++) {
3485       procsnz[0] += locrowlens[j];
3486     }
3487     for (i=1; i<size; i++) {
3488       mend = browners[i+1] - browners[i];
3489       if (i == size-1) mend = mend - extra_rows;
3490       ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3491       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3492       /* calculate the number of nonzeros on each processor */
3493       procsnz[i] = 0;
3494       for (j=0; j<browners[i+1]-browners[i]; j++) {
3495         procsnz[i] += rowlengths[j];
3496       }
3497       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3498     }
3499     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3500   } else {
3501     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3502   }
3503 
3504   if (!rank) {
3505     /* determine max buffer needed and allocate it */
3506     maxnz = procsnz[0];
3507     for (i=1; i<size; i++) {
3508       maxnz = PetscMax(maxnz,procsnz[i]);
3509     }
3510     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3511 
3512     /* read in my part of the matrix column indices  */
3513     nz     = procsnz[0];
3514     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3515     mycols = ibuf;
3516     if (size == 1) nz -= extra_rows;
3517     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3518     if (size == 1) {
3519       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3520     }
3521 
3522     /* read in every ones (except the last) and ship off */
3523     for (i=1; i<size-1; i++) {
3524       nz   = procsnz[i];
3525       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3526       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3527     }
3528     /* read in the stuff for the last proc */
3529     if (size != 1) {
3530       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3531       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3532       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3533       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3534     }
3535     ierr = PetscFree(cols);CHKERRQ(ierr);
3536   } else {
3537     /* determine buffer space needed for message */
3538     nz = 0;
3539     for (i=0; i<m; i++) {
3540       nz += locrowlens[i];
3541     }
3542     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3543     mycols = ibuf;
3544     /* receive message of column indices*/
3545     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3546     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3547     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3548   }
3549 
3550   /* loop over local rows, determining number of off diagonal entries */
3551   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3552   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3553   rowcount = 0; nzcount = 0;
3554   for (i=0; i<mbs; i++) {
3555     dcount  = 0;
3556     odcount = 0;
3557     for (j=0; j<bs; j++) {
3558       kmax = locrowlens[rowcount];
3559       for (k=0; k<kmax; k++) {
3560         tmp = mycols[nzcount++]/bs;
3561         if (!mask[tmp]) {
3562           mask[tmp] = 1;
3563           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3564           else masked1[dcount++] = tmp;
3565         }
3566       }
3567       rowcount++;
3568     }
3569 
3570     dlens[i]  = dcount;
3571     odlens[i] = odcount;
3572 
3573     /* zero out the mask elements we set */
3574     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3575     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3576   }
3577 
3578   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3579   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3580 
3581   if (!rank) {
3582     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3583     /* read in my part of the matrix numerical values  */
3584     nz     = procsnz[0];
3585     vals   = buf;
3586     mycols = ibuf;
3587     if (size == 1) nz -= extra_rows;
3588     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3589     if (size == 1) {
3590       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3591     }
3592 
3593     /* insert into matrix */
3594     jj = rstart*bs;
3595     for (i=0; i<m; i++) {
3596       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3597       mycols += locrowlens[i];
3598       vals   += locrowlens[i];
3599       jj++;
3600     }
3601     /* read in other processors (except the last one) and ship out */
3602     for (i=1; i<size-1; i++) {
3603       nz   = procsnz[i];
3604       vals = buf;
3605       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3606       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3607     }
3608     /* the last proc */
3609     if (size != 1) {
3610       nz   = procsnz[i] - extra_rows;
3611       vals = buf;
3612       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3613       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3614       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3615     }
3616     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3617   } else {
3618     /* receive numeric values */
3619     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3620 
3621     /* receive message of values*/
3622     vals   = buf;
3623     mycols = ibuf;
3624     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3625 
3626     /* insert into matrix */
3627     jj = rstart*bs;
3628     for (i=0; i<m; i++) {
3629       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3630       mycols += locrowlens[i];
3631       vals   += locrowlens[i];
3632       jj++;
3633     }
3634   }
3635   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3636   ierr = PetscFree(buf);CHKERRQ(ierr);
3637   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3638   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3639   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3640   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3641   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3642   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3643   PetscFunctionReturn(0);
3644 }
3645 
3646 /*@
3647    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3648 
3649    Input Parameters:
3650 +  mat  - the matrix
3651 -  fact - factor
3652 
3653    Not Collective, each process can use a different factor
3654 
3655    Level: advanced
3656 
3657   Notes:
3658    This can also be set by the command line option: -mat_use_hash_table <fact>
3659 
3660 .seealso: MatSetOption()
3661 @*/
3662 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3663 {
3664   PetscErrorCode ierr;
3665 
3666   PetscFunctionBegin;
3667   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3668   PetscFunctionReturn(0);
3669 }
3670 
3671 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3672 {
3673   Mat_MPIBAIJ *baij;
3674 
3675   PetscFunctionBegin;
3676   baij          = (Mat_MPIBAIJ*)mat->data;
3677   baij->ht_fact = fact;
3678   PetscFunctionReturn(0);
3679 }
3680 
3681 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3682 {
3683   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3684   PetscBool      flg;
3685   PetscErrorCode ierr;
3686 
3687   PetscFunctionBegin;
3688   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3689   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3690   if (Ad)     *Ad     = a->A;
3691   if (Ao)     *Ao     = a->B;
3692   if (colmap) *colmap = a->garray;
3693   PetscFunctionReturn(0);
3694 }
3695 
3696 /*
3697     Special version for direct calls from Fortran (to eliminate two function call overheads
3698 */
3699 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3700 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3702 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3703 #endif
3704 
3705 /*@C
3706   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3707 
3708   Collective on Mat
3709 
3710   Input Parameters:
3711 + mat - the matrix
3712 . min - number of input rows
3713 . im - input rows
3714 . nin - number of input columns
3715 . in - input columns
3716 . v - numerical values input
3717 - addvin - INSERT_VALUES or ADD_VALUES
3718 
3719   Notes:
3720     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3721 
3722   Level: advanced
3723 
3724 .seealso:   MatSetValuesBlocked()
3725 @*/
3726 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3727 {
3728   /* convert input arguments to C version */
3729   Mat        mat  = *matin;
3730   PetscInt   m    = *min, n = *nin;
3731   InsertMode addv = *addvin;
3732 
3733   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3734   const MatScalar *value;
3735   MatScalar       *barray     = baij->barray;
3736   PetscBool       roworiented = baij->roworiented;
3737   PetscErrorCode  ierr;
3738   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3739   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3740   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3741 
3742   PetscFunctionBegin;
3743   /* tasks normally handled by MatSetValuesBlocked() */
3744   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3745 #if defined(PETSC_USE_DEBUG)
3746   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3747   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3748 #endif
3749   if (mat->assembled) {
3750     mat->was_assembled = PETSC_TRUE;
3751     mat->assembled     = PETSC_FALSE;
3752   }
3753   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3754 
3755 
3756   if (!barray) {
3757     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3758     baij->barray = barray;
3759   }
3760 
3761   if (roworiented) stepval = (n-1)*bs;
3762   else stepval = (m-1)*bs;
3763 
3764   for (i=0; i<m; i++) {
3765     if (im[i] < 0) continue;
3766 #if defined(PETSC_USE_DEBUG)
3767     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3768 #endif
3769     if (im[i] >= rstart && im[i] < rend) {
3770       row = im[i] - rstart;
3771       for (j=0; j<n; j++) {
3772         /* If NumCol = 1 then a copy is not required */
3773         if ((roworiented) && (n == 1)) {
3774           barray = (MatScalar*)v + i*bs2;
3775         } else if ((!roworiented) && (m == 1)) {
3776           barray = (MatScalar*)v + j*bs2;
3777         } else { /* Here a copy is required */
3778           if (roworiented) {
3779             value = v + i*(stepval+bs)*bs + j*bs;
3780           } else {
3781             value = v + j*(stepval+bs)*bs + i*bs;
3782           }
3783           for (ii=0; ii<bs; ii++,value+=stepval) {
3784             for (jj=0; jj<bs; jj++) {
3785               *barray++ = *value++;
3786             }
3787           }
3788           barray -=bs2;
3789         }
3790 
3791         if (in[j] >= cstart && in[j] < cend) {
3792           col  = in[j] - cstart;
3793           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3794         } else if (in[j] < 0) continue;
3795 #if defined(PETSC_USE_DEBUG)
3796         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3797 #endif
3798         else {
3799           if (mat->was_assembled) {
3800             if (!baij->colmap) {
3801               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3802             }
3803 
3804 #if defined(PETSC_USE_DEBUG)
3805 #if defined(PETSC_USE_CTABLE)
3806             { PetscInt data;
3807               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3808               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3809             }
3810 #else
3811             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3812 #endif
3813 #endif
3814 #if defined(PETSC_USE_CTABLE)
3815             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3816             col  = (col - 1)/bs;
3817 #else
3818             col = (baij->colmap[in[j]] - 1)/bs;
3819 #endif
3820             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3821               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3822               col  =  in[j];
3823             }
3824           } else col = in[j];
3825           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3826         }
3827       }
3828     } else {
3829       if (!baij->donotstash) {
3830         if (roworiented) {
3831           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3832         } else {
3833           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3834         }
3835       }
3836     }
3837   }
3838 
3839   /* task normally handled by MatSetValuesBlocked() */
3840   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 /*@
3845      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3846          CSR format the local rows.
3847 
3848    Collective
3849 
3850    Input Parameters:
3851 +  comm - MPI communicator
3852 .  bs - the block size, only a block size of 1 is supported
3853 .  m - number of local rows (Cannot be PETSC_DECIDE)
3854 .  n - This value should be the same as the local size used in creating the
3855        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3856        calculated if N is given) For square matrices n is almost always m.
3857 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3858 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3859 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3860 .   j - column indices
3861 -   a - matrix values
3862 
3863    Output Parameter:
3864 .   mat - the matrix
3865 
3866    Level: intermediate
3867 
3868    Notes:
3869        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3870      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3871      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3872 
3873      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3874      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3875      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3876      with column-major ordering within blocks.
3877 
3878        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3879 
3880 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3881           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3882 @*/
3883 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3884 {
3885   PetscErrorCode ierr;
3886 
3887   PetscFunctionBegin;
3888   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3889   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3890   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3891   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3892   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3893   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3894   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3895   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3896   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3897   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3902 {
3903   PetscErrorCode ierr;
3904   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3905   PetscInt       *indx;
3906   PetscScalar    *values;
3907 
3908   PetscFunctionBegin;
3909   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3910   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3911     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3912     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3913     PetscInt       *bindx,rmax=a->rmax,j;
3914     PetscMPIInt    rank,size;
3915 
3916     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3917     mbs = m/bs; Nbs = N/cbs;
3918     if (n == PETSC_DECIDE) {
3919       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3920     }
3921     nbs = n/cbs;
3922 
3923     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3924     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3925 
3926     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3927     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3928     if (rank == size-1) {
3929       /* Check sum(nbs) = Nbs */
3930       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3931     }
3932 
3933     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3934     for (i=0; i<mbs; i++) {
3935       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3936       nnz = nnz/bs;
3937       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3938       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3939       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3940     }
3941     ierr = PetscFree(bindx);CHKERRQ(ierr);
3942 
3943     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3944     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3945     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3946     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3947     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3948     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3949     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3950   }
3951 
3952   /* numeric phase */
3953   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3954   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3955 
3956   for (i=0; i<m; i++) {
3957     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3958     Ii   = i + rstart;
3959     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3960     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3961   }
3962   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   PetscFunctionReturn(0);
3965 }
3966