xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479         idx = h1;
480         if (PetscDefined(USE_DEBUG)) {
481           insert_ct++;
482           total_ct++;
483           if (HT[idx] != key) {
484             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
485             if (idx == size) {
486               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
487               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
488             }
489           }
490         } else if (HT[idx] != key) {
491           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
492           if (idx == size) {
493             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
494             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
495           }
496         }
497         /* A HASH table entry is found, so insert the values at the correct address */
498         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
499         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
500       }
501     } else if (!baij->donotstash) {
502       if (roworiented) {
503         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
504       } else {
505         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
506       }
507     }
508   }
509   if (PetscDefined(USE_DEBUG)) {
510     baij->ht_total_ct  += total_ct;
511     baij->ht_insert_ct += insert_ct;
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
517 {
518   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
519   PetscBool         roworiented = baij->roworiented;
520   PetscErrorCode    ierr;
521   PetscInt          i,j,ii,jj,row,col;
522   PetscInt          rstart=baij->rstartbs;
523   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
524   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
525   PetscReal         tmp;
526   MatScalar         **HD = baij->hd,*baij_a;
527   const PetscScalar *v_t,*value;
528   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
529 
530   PetscFunctionBegin;
531   if (roworiented) stepval = (n-1)*bs;
532   else stepval = (m-1)*bs;
533 
534   for (i=0; i<m; i++) {
535     if (PetscDefined(USE_DEBUG)) {
536       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
537       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
538     }
539     row = im[i];
540     v_t = v + i*nbs2;
541     if (row >= rstart && row < rend) {
542       for (j=0; j<n; j++) {
543         col = in[j];
544 
545         /* Look up into the Hash Table */
546         key = row*Nbs+col+1;
547         h1  = HASH(size,key,tmp);
548 
549         idx = h1;
550         if (PetscDefined(USE_DEBUG)) {
551           total_ct++;
552           insert_ct++;
553           if (HT[idx] != key) {
554             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
555             if (idx == size) {
556               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
557               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
558             }
559           }
560         } else if (HT[idx] != key) {
561           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
562           if (idx == size) {
563             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
564             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
565           }
566         }
567         baij_a = HD[idx];
568         if (roworiented) {
569           /*value = v + i*(stepval+bs)*bs + j*bs;*/
570           /* value = v + (i*(stepval+bs)+j)*bs; */
571           value = v_t;
572           v_t  += bs;
573           if (addv == ADD_VALUES) {
574             for (ii=0; ii<bs; ii++,value+=stepval) {
575               for (jj=ii; jj<bs2; jj+=bs) {
576                 baij_a[jj] += *value++;
577               }
578             }
579           } else {
580             for (ii=0; ii<bs; ii++,value+=stepval) {
581               for (jj=ii; jj<bs2; jj+=bs) {
582                 baij_a[jj] = *value++;
583               }
584             }
585           }
586         } else {
587           value = v + j*(stepval+bs)*bs + i*bs;
588           if (addv == ADD_VALUES) {
589             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590               for (jj=0; jj<bs; jj++) {
591                 baij_a[jj] += *value++;
592               }
593             }
594           } else {
595             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
596               for (jj=0; jj<bs; jj++) {
597                 baij_a[jj] = *value++;
598               }
599             }
600           }
601         }
602       }
603     } else {
604       if (!baij->donotstash) {
605         if (roworiented) {
606           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         } else {
608           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
609         }
610       }
611     }
612   }
613   if (PetscDefined(USE_DEBUG)) {
614     baij->ht_total_ct  += total_ct;
615     baij->ht_insert_ct += insert_ct;
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
623   PetscErrorCode ierr;
624   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
625   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
626 
627   PetscFunctionBegin;
628   for (i=0; i<m; i++) {
629     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
630     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
631     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632       row = idxm[i] - bsrstart;
633       for (j=0; j<n; j++) {
634         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
635         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
636         if (idxn[j] >= bscstart && idxn[j] < bscend) {
637           col  = idxn[j] - bscstart;
638           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
639         } else {
640           if (!baij->colmap) {
641             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642           }
643 #if defined(PETSC_USE_CTABLE)
644           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
645           data--;
646 #else
647           data = baij->colmap[idxn[j]/bs]-1;
648 #endif
649           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650           else {
651             col  = data + idxn[j]%bs;
652             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
653           }
654         }
655       }
656     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
662 {
663   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
664   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
665   PetscErrorCode ierr;
666   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
667   PetscReal      sum = 0.0;
668   MatScalar      *v;
669 
670   PetscFunctionBegin;
671   if (baij->size == 1) {
672     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
673   } else {
674     if (type == NORM_FROBENIUS) {
675       v  = amat->a;
676       nz = amat->nz*bs2;
677       for (i=0; i<nz; i++) {
678         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
679       }
680       v  = bmat->a;
681       nz = bmat->nz*bs2;
682       for (i=0; i<nz; i++) {
683         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
684       }
685       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
686       *nrm = PetscSqrtReal(*nrm);
687     } else if (type == NORM_1) { /* max column sum */
688       PetscReal *tmp,*tmp2;
689       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
690       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
691       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
692       v    = amat->a; jj = amat->j;
693       for (i=0; i<amat->nz; i++) {
694         for (j=0; j<bs; j++) {
695           col = bs*(cstart + *jj) + j; /* column index */
696           for (row=0; row<bs; row++) {
697             tmp[col] += PetscAbsScalar(*v);  v++;
698           }
699         }
700         jj++;
701       }
702       v = bmat->a; jj = bmat->j;
703       for (i=0; i<bmat->nz; i++) {
704         for (j=0; j<bs; j++) {
705           col = bs*garray[*jj] + j;
706           for (row=0; row<bs; row++) {
707             tmp[col] += PetscAbsScalar(*v); v++;
708           }
709         }
710         jj++;
711       }
712       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
713       *nrm = 0.0;
714       for (j=0; j<mat->cmap->N; j++) {
715         if (tmp2[j] > *nrm) *nrm = tmp2[j];
716       }
717       ierr = PetscFree(tmp);CHKERRQ(ierr);
718       ierr = PetscFree(tmp2);CHKERRQ(ierr);
719     } else if (type == NORM_INFINITY) { /* max row sum */
720       PetscReal *sums;
721       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
722       sum  = 0.0;
723       for (j=0; j<amat->mbs; j++) {
724         for (row=0; row<bs; row++) sums[row] = 0.0;
725         v  = amat->a + bs2*amat->i[j];
726         nz = amat->i[j+1]-amat->i[j];
727         for (i=0; i<nz; i++) {
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               sums[row] += PetscAbsScalar(*v); v++;
731             }
732           }
733         }
734         v  = bmat->a + bs2*bmat->i[j];
735         nz = bmat->i[j+1]-bmat->i[j];
736         for (i=0; i<nz; i++) {
737           for (col=0; col<bs; col++) {
738             for (row=0; row<bs; row++) {
739               sums[row] += PetscAbsScalar(*v); v++;
740             }
741           }
742         }
743         for (row=0; row<bs; row++) {
744           if (sums[row] > sum) sum = sums[row];
745         }
746       }
747       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
748       ierr = PetscFree(sums);CHKERRQ(ierr);
749     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755   Creates the hash table, and sets the table
756   This table is created only once.
757   If new entried need to be added to the matrix
758   then the hash table has to be destroyed and
759   recreated.
760 */
761 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
762 {
763   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
764   Mat            A     = baij->A,B=baij->B;
765   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
766   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
767   PetscErrorCode ierr;
768   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
769   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
770   PetscInt       *HT,key;
771   MatScalar      **HD;
772   PetscReal      tmp;
773 #if defined(PETSC_USE_INFO)
774   PetscInt ct=0,max=0;
775 #endif
776 
777   PetscFunctionBegin;
778   if (baij->ht) PetscFunctionReturn(0);
779 
780   baij->ht_size = (PetscInt)(factor*nz);
781   ht_size       = baij->ht_size;
782 
783   /* Allocate Memory for Hash Table */
784   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
785   HD   = baij->hd;
786   HT   = baij->ht;
787 
788   /* Loop Over A */
789   for (i=0; i<a->mbs; i++) {
790     for (j=ai[i]; j<ai[i+1]; j++) {
791       row = i+rstart;
792       col = aj[j]+cstart;
793 
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++) {
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = a->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812   /* Loop Over B */
813   for (i=0; i<b->mbs; i++) {
814     for (j=bi[i]; j<bi[i+1]; j++) {
815       row = i+rstart;
816       col = garray[bj[j]];
817       key = row*Nbs + col + 1;
818       h1  = HASH(ht_size,key,tmp);
819       for (k=0; k<ht_size; k++) {
820         if (!HT[(h1+k)%ht_size]) {
821           HT[(h1+k)%ht_size] = key;
822           HD[(h1+k)%ht_size] = b->a + j*bs2;
823           break;
824 #if defined(PETSC_USE_INFO)
825         } else {
826           ct++;
827 #endif
828         }
829       }
830 #if defined(PETSC_USE_INFO)
831       if (k> max) max = k;
832 #endif
833     }
834   }
835 
836   /* Print Summary */
837 #if defined(PETSC_USE_INFO)
838   for (i=0,j=0; i<ht_size; i++) {
839     if (HT[i]) j++;
840   }
841   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
842 #endif
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
849   PetscErrorCode ierr;
850   PetscInt       nstash,reallocs;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
854 
855   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
856   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
857   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
858   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
859   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
860   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
865 {
866   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
867   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
868   PetscErrorCode ierr;
869   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
870   PetscInt       *row,*col;
871   PetscBool      r1,r2,r3,other_disassembled;
872   MatScalar      *val;
873   PetscMPIInt    n;
874 
875   PetscFunctionBegin;
876   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
877   if (!baij->donotstash && !mat->nooffprocentries) {
878     while (1) {
879       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
880       if (!flg) break;
881 
882       for (i=0; i<n;) {
883         /* Now identify the consecutive vals belonging to the same row */
884         for (j=i,rstart=row[j]; j<n; j++) {
885           if (row[j] != rstart) break;
886         }
887         if (j < n) ncols = j-i;
888         else       ncols = n-i;
889         /* Now assemble all these values with a single function call */
890         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
891         i    = j;
892       }
893     }
894     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
895     /* Now process the block-stash. Since the values are stashed column-oriented,
896        set the roworiented flag to column oriented, and after MatSetValues()
897        restore the original flags */
898     r1 = baij->roworiented;
899     r2 = a->roworiented;
900     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
901 
902     baij->roworiented = PETSC_FALSE;
903     a->roworiented    = PETSC_FALSE;
904 
905     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
906     while (1) {
907       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
908       if (!flg) break;
909 
910       for (i=0; i<n;) {
911         /* Now identify the consecutive vals belonging to the same row */
912         for (j=i,rstart=row[j]; j<n; j++) {
913           if (row[j] != rstart) break;
914         }
915         if (j < n) ncols = j-i;
916         else       ncols = n-i;
917         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
918         i    = j;
919       }
920     }
921     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
922 
923     baij->roworiented = r1;
924     a->roworiented    = r2;
925 
926     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
927   }
928 
929   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
931 
932   /* determine if any processor has disassembled, if so we must
933      also disassemble ourselfs, in order that we may reassemble. */
934   /*
935      if nonzero structure of submatrix B cannot change then we know that
936      no processor disassembled thus we can skip this stuff
937   */
938   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
939     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
940     if (mat->was_assembled && !other_disassembled) {
941       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
942     }
943   }
944 
945   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
946     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
947   }
948   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
950 
951 #if defined(PETSC_USE_INFO)
952   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
953     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
954 
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961 
962     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
963     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
964   }
965 
966   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
967 
968   baij->rowvalues = NULL;
969 
970   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
971   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
972     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
973     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
979 #include <petscdraw.h>
980 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
981 {
982   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
983   PetscErrorCode    ierr;
984   PetscMPIInt       rank = baij->rank;
985   PetscInt          bs   = mat->rmap->bs;
986   PetscBool         iascii,isdraw;
987   PetscViewer       sviewer;
988   PetscViewerFormat format;
989 
990   PetscFunctionBegin;
991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
993   if (iascii) {
994     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
995     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
996       MatInfo info;
997       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1001                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1002       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1003       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1004       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1005       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1006       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1009       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1012       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1013       PetscFunctionReturn(0);
1014     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1015       PetscFunctionReturn(0);
1016     }
1017   }
1018 
1019   if (isdraw) {
1020     PetscDraw draw;
1021     PetscBool isnull;
1022     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1023     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1024     if (isnull) PetscFunctionReturn(0);
1025   }
1026 
1027   {
1028     /* assemble the entire matrix onto first processor. */
1029     Mat         A;
1030     Mat_SeqBAIJ *Aloc;
1031     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1032     MatScalar   *a;
1033     const char  *matname;
1034 
1035     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1036     /* Perhaps this should be the type of mat? */
1037     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1038     if (rank == 0) {
1039       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1040     } else {
1041       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1042     }
1043     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1044     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1045     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1046     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1047 
1048     /* copy over the A part */
1049     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1050     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1052 
1053     for (i=0; i<mbs; i++) {
1054       rvals[0] = bs*(baij->rstartbs + i);
1055       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1056       for (j=ai[i]; j<ai[i+1]; j++) {
1057         col = (baij->cstartbs+aj[j])*bs;
1058         for (k=0; k<bs; k++) {
1059           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060           col++; a += bs;
1061         }
1062       }
1063     }
1064     /* copy over the B part */
1065     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1066     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1067     for (i=0; i<mbs; i++) {
1068       rvals[0] = bs*(baij->rstartbs + i);
1069       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1070       for (j=ai[i]; j<ai[i+1]; j++) {
1071         col = baij->garray[aj[j]]*bs;
1072         for (k=0; k<bs; k++) {
1073           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074           col++; a += bs;
1075         }
1076       }
1077     }
1078     ierr = PetscFree(rvals);CHKERRQ(ierr);
1079     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     /*
1082        Everyone has to call to draw the matrix since the graphics waits are
1083        synchronized across all processors that share the PetscDraw object
1084     */
1085     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1087     if (rank == 0) {
1088       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1089       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1090     }
1091     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1093     ierr = MatDestroy(&A);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1099 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1100 {
1101   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1102   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1103   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1104   const PetscInt *garray = aij->garray;
1105   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1106   PetscInt       *rowlens,*colidxs;
1107   PetscScalar    *matvals;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1112 
1113   M  = mat->rmap->N;
1114   N  = mat->cmap->N;
1115   m  = mat->rmap->n;
1116   rs = mat->rmap->rstart;
1117   cs = mat->cmap->rstart;
1118   bs = mat->rmap->bs;
1119   nz = bs*bs*(A->nz + B->nz);
1120 
1121   /* write matrix header */
1122   header[0] = MAT_FILE_CLASSID;
1123   header[1] = M; header[2] = N; header[3] = nz;
1124   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1125   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1126 
1127   /* fill in and store row lengths */
1128   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1129   for (cnt=0, i=0; i<A->mbs; i++)
1130     for (j=0; j<bs; j++)
1131       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1132   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1133   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1134 
1135   /* fill in and store column indices */
1136   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++) {
1138     for (k=0; k<bs; k++) {
1139       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1140         if (garray[B->j[jb]] > cs/bs) break;
1141         for (l=0; l<bs; l++)
1142           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1143       }
1144       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1145         for (l=0; l<bs; l++)
1146           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1147       for (; jb<B->i[i+1]; jb++)
1148         for (l=0; l<bs; l++)
1149           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150     }
1151   }
1152   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1153   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1154   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1155 
1156   /* fill in and store nonzero values */
1157   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1158   for (cnt=0, i=0; i<A->mbs; i++) {
1159     for (k=0; k<bs; k++) {
1160       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1161         if (garray[B->j[jb]] > cs/bs) break;
1162         for (l=0; l<bs; l++)
1163           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1164       }
1165       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1166         for (l=0; l<bs; l++)
1167           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1168       for (; jb<B->i[i+1]; jb++)
1169         for (l=0; l<bs; l++)
1170           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171     }
1172   }
1173   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1174   ierr = PetscFree(matvals);CHKERRQ(ierr);
1175 
1176   /* write block size option to the viewer's .info file */
1177   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1182 {
1183   PetscErrorCode ierr;
1184   PetscBool      iascii,isdraw,issocket,isbinary;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1191   if (iascii || isdraw || issocket) {
1192     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193   } else if (isbinary) {
1194     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1200 {
1201   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205 #if defined(PETSC_USE_LOG)
1206   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1207 #endif
1208   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1209   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1210   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1212 #if defined(PETSC_USE_CTABLE)
1213   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1214 #else
1215   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1216 #endif
1217   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1218   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1219   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1220   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1221   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1222   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1224   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1225 
1226   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_HYPRE)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246   PetscInt       nt;
1247 
1248   PetscFunctionBegin;
1249   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1250   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1252   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1274 {
1275   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281   /* do local part */
1282   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283   /* add partial results together */
1284   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290 {
1291   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   /* do nondiagonal part */
1296   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297   /* do local part */
1298   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299   /* add partial results together */
1300   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /*
1306   This only works correctly for square matrices where the subblock A->A is the
1307    diagonal block
1308 */
1309 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1316   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1321 {
1322   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1327   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1334   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1335   PetscErrorCode ierr;
1336   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1337   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1338   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1339 
1340   PetscFunctionBegin;
1341   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1342   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1343   mat->getrowactive = PETSC_TRUE;
1344 
1345   if (!mat->rowvalues && (idx || v)) {
1346     /*
1347         allocate enough space to hold information from the longest row.
1348     */
1349     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1350     PetscInt    max = 1,mbs = mat->mbs,tmp;
1351     for (i=0; i<mbs; i++) {
1352       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1353       if (max < tmp) max = tmp;
1354     }
1355     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1356   }
1357   lrow = row - brstart;
1358 
1359   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1360   if (!v)   {pvA = NULL; pvB = NULL;}
1361   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1362   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1363   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1364   nztot = nzA + nzB;
1365 
1366   cmap = mat->garray;
1367   if (v  || idx) {
1368     if (nztot) {
1369       /* Sort by increasing column numbers, assuming A and B already sorted */
1370       PetscInt imark = -1;
1371       if (v) {
1372         *v = v_p = mat->rowvalues;
1373         for (i=0; i<nzB; i++) {
1374           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1375           else break;
1376         }
1377         imark = i;
1378         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1379         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1380       }
1381       if (idx) {
1382         *idx = idx_p = mat->rowindices;
1383         if (imark > -1) {
1384           for (i=0; i<imark; i++) {
1385             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1386           }
1387         } else {
1388           for (i=0; i<nzB; i++) {
1389             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1390             else break;
1391           }
1392           imark = i;
1393         }
1394         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1395         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396       }
1397     } else {
1398       if (idx) *idx = NULL;
1399       if (v)   *v   = NULL;
1400     }
1401   }
1402   *nz  = nztot;
1403   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1404   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1411 
1412   PetscFunctionBegin;
1413   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1414   baij->getrowactive = PETSC_FALSE;
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1419 {
1420   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1425   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1432   Mat            A  = a->A,B = a->B;
1433   PetscErrorCode ierr;
1434   PetscLogDouble isend[5],irecv[5];
1435 
1436   PetscFunctionBegin;
1437   info->block_size = (PetscReal)matin->rmap->bs;
1438 
1439   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1440 
1441   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1442   isend[3] = info->memory;  isend[4] = info->mallocs;
1443 
1444   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1445 
1446   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1447   isend[3] += info->memory;  isend[4] += info->mallocs;
1448 
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1479 {
1480   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   switch (op) {
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1487   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488   case MAT_KEEP_NONZERO_PATTERN:
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     MatCheckPreallocated(A,1);
1491     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1492     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1493     break;
1494   case MAT_ROW_ORIENTED:
1495     MatCheckPreallocated(A,1);
1496     a->roworiented = flg;
1497 
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1500     break;
1501   case MAT_FORCE_DIAGONAL_ENTRIES:
1502   case MAT_SORTED_FULL:
1503     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1504     break;
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506     a->donotstash = flg;
1507     break;
1508   case MAT_USE_HASH_TABLE:
1509     a->ht_flag = flg;
1510     a->ht_fact = 1.39;
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SUBMAT_SINGLEIS:
1516   case MAT_SYMMETRY_ETERNAL:
1517     MatCheckPreallocated(A,1);
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   default:
1521     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1527 {
1528   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1529   Mat_SeqBAIJ    *Aloc;
1530   Mat            B;
1531   PetscErrorCode ierr;
1532   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1533   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1534   MatScalar      *a;
1535 
1536   PetscFunctionBegin;
1537   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1538     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1539     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1540     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1541     /* Do not know preallocation information, but must set block size */
1542     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1543   } else {
1544     B = *matout;
1545   }
1546 
1547   /* copy over the A part */
1548   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1549   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1551 
1552   for (i=0; i<mbs; i++) {
1553     rvals[0] = bs*(baij->rstartbs + i);
1554     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1555     for (j=ai[i]; j<ai[i+1]; j++) {
1556       col = (baij->cstartbs+aj[j])*bs;
1557       for (k=0; k<bs; k++) {
1558         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1559 
1560         col++; a += bs;
1561       }
1562     }
1563   }
1564   /* copy over the B part */
1565   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1566   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567   for (i=0; i<mbs; i++) {
1568     rvals[0] = bs*(baij->rstartbs + i);
1569     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       col = baij->garray[aj[j]]*bs;
1572       for (k=0; k<bs; k++) {
1573         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1574         col++;
1575         a += bs;
1576       }
1577     }
1578   }
1579   ierr = PetscFree(rvals);CHKERRQ(ierr);
1580   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582 
1583   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1584   else {
1585     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1591 {
1592   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1593   Mat            a     = baij->A,b = baij->B;
1594   PetscErrorCode ierr;
1595   PetscInt       s1,s2,s3;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1599   if (rr) {
1600     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1601     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1602     /* Overlap communication with computation. */
1603     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604   }
1605   if (ll) {
1606     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1607     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1608     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1609   }
1610   /* scale  the diagonal block */
1611   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1612 
1613   if (rr) {
1614     /* Do a scatter end and then right scale the off-diagonal block */
1615     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1622 {
1623   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1624   PetscInt      *lrows;
1625   PetscInt       r, len;
1626   PetscBool      cong;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   /* get locally owned rows */
1631   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1632   /* fix right hand side if needed */
1633   if (x && b) {
1634     const PetscScalar *xx;
1635     PetscScalar       *bb;
1636 
1637     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1638     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1639     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1640     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1641     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1642   }
1643 
1644   /* actually zap the local rows */
1645   /*
1646         Zero the required rows. If the "diagonal block" of the matrix
1647      is square and the user wishes to set the diagonal we use separate
1648      code so that MatSetValues() is not called for each diagonal allocating
1649      new memory, thus calling lots of mallocs and slowing things down.
1650 
1651   */
1652   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1653   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1654   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1655   if ((diag != 0.0) && cong) {
1656     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1657   } else if (diag != 0.0) {
1658     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1659     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1660        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1661     for (r = 0; r < len; ++r) {
1662       const PetscInt row = lrows[r] + A->rmap->rstart;
1663       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   } else {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(lrows);CHKERRQ(ierr);
1671 
1672   /* only change matrix nonzero state if pattern was allowed to be changed */
1673   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1674     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1675     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1681 {
1682   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1683   PetscErrorCode    ierr;
1684   PetscMPIInt       n = A->rmap->n,p = 0;
1685   PetscInt          i,j,k,r,len = 0,row,col,count;
1686   PetscInt          *lrows,*owners = A->rmap->range;
1687   PetscSFNode       *rrows;
1688   PetscSF           sf;
1689   const PetscScalar *xx;
1690   PetscScalar       *bb,*mask;
1691   Vec               xmask,lmask;
1692   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1693   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1694   PetscScalar       *aa;
1695 
1696   PetscFunctionBegin;
1697   /* Create SF where leaves are input rows and roots are owned rows */
1698   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1699   for (r = 0; r < n; ++r) lrows[r] = -1;
1700   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1701   for (r = 0; r < N; ++r) {
1702     const PetscInt idx   = rows[r];
1703     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1704     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1705       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1706     }
1707     rrows[r].rank  = p;
1708     rrows[r].index = rows[r] - owners[p];
1709   }
1710   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1711   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1712   /* Collect flags for rows to be zeroed */
1713   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1714   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1716   /* Compress and put in row numbers */
1717   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1718   /* zero diagonal part of matrix */
1719   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1720   /* handle off diagonal part of matrix */
1721   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1722   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1723   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1724   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1725   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1726   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1729   if (x) {
1730     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1733     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1734   }
1735   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1736   /* remove zeroed rows of off diagonal matrix */
1737   for (i = 0; i < len; ++i) {
1738     row   = lrows[i];
1739     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1740     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1741     for (k = 0; k < count; ++k) {
1742       aa[0] = 0.0;
1743       aa   += bs;
1744     }
1745   }
1746   /* loop over all elements of off process part of matrix zeroing removed columns*/
1747   for (i = 0; i < l->B->rmap->N; ++i) {
1748     row = i/bs;
1749     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1750       for (k = 0; k < bs; ++k) {
1751         col = bs*baij->j[j] + k;
1752         if (PetscAbsScalar(mask[col])) {
1753           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1754           if (x) bb[i] -= aa[0]*xx[col];
1755           aa[0] = 0.0;
1756         }
1757       }
1758     }
1759   }
1760   if (x) {
1761     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1762     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1763   }
1764   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1765   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1766   ierr = PetscFree(lrows);CHKERRQ(ierr);
1767 
1768   /* only change matrix nonzero state if pattern was allowed to be changed */
1769   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1770     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1771     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1787 
1788 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1789 {
1790   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1791   Mat            a,b,c,d;
1792   PetscBool      flg;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   a = matA->A; b = matA->B;
1797   c = matB->A; d = matB->B;
1798 
1799   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1800   if (flg) {
1801     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1802   }
1803   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1808 {
1809   PetscErrorCode ierr;
1810   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1811   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1812 
1813   PetscFunctionBegin;
1814   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1815   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1816     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1819     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1820   }
1821   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1838   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1839   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1847 {
1848   PetscErrorCode ierr;
1849   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1850   PetscBLASInt   bnz,one=1;
1851   Mat_SeqBAIJ    *x,*y;
1852   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1853 
1854   PetscFunctionBegin;
1855   if (str == SAME_NONZERO_PATTERN) {
1856     PetscScalar alpha = a;
1857     x    = (Mat_SeqBAIJ*)xx->A->data;
1858     y    = (Mat_SeqBAIJ*)yy->A->data;
1859     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1860     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1861     x    = (Mat_SeqBAIJ*)xx->B->data;
1862     y    = (Mat_SeqBAIJ*)yy->B->data;
1863     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1864     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1865     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1866   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1867     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1868   } else {
1869     Mat      B;
1870     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1871     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1874     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1875     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1876     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1877     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1878     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1880     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1881     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1891 {
1892 #if defined(PETSC_USE_COMPLEX)
1893   PetscErrorCode ierr;
1894   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
1898   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
1899 #else
1900   PetscFunctionBegin;
1901 #endif
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1912   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1917 {
1918   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1923   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1928 {
1929   PetscErrorCode ierr;
1930   IS             iscol_local;
1931   PetscInt       csize;
1932 
1933   PetscFunctionBegin;
1934   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1935   if (call == MAT_REUSE_MATRIX) {
1936     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1937     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1938   } else {
1939     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1940   }
1941   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1942   if (call == MAT_INITIAL_MATRIX) {
1943     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1944     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /*
1950   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1951   in local and then by concatenating the local matrices the end result.
1952   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1953   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1954 */
1955 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1956 {
1957   PetscErrorCode ierr;
1958   PetscMPIInt    rank,size;
1959   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1961   Mat            M,Mreuse;
1962   MatScalar      *vwork,*aa;
1963   MPI_Comm       comm;
1964   IS             isrow_new, iscol_new;
1965   Mat_SeqBAIJ    *aij;
1966 
1967   PetscFunctionBegin;
1968   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1969   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1970   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1971   /* The compression and expansion should be avoided. Doesn't point
1972      out errors, might change the indices, hence buggey */
1973   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1974   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1975 
1976   if (call ==  MAT_REUSE_MATRIX) {
1977     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1978     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1979     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1980   } else {
1981     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1982   }
1983   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1984   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1985   /*
1986       m - number of local rows
1987       n - number of columns (same on all processors)
1988       rstart - first row in new global matrix generated
1989   */
1990   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1991   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1992   m    = m/bs;
1993   n    = n/bs;
1994 
1995   if (call == MAT_INITIAL_MATRIX) {
1996     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1997     ii  = aij->i;
1998     jj  = aij->j;
1999 
2000     /*
2001         Determine the number of non-zeros in the diagonal and off-diagonal
2002         portions of the matrix in order to do correct preallocation
2003     */
2004 
2005     /* first get start and end of "diagonal" columns */
2006     if (csize == PETSC_DECIDE) {
2007       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2008       if (mglobal == n*bs) { /* square matrix */
2009         nlocal = m;
2010       } else {
2011         nlocal = n/size + ((n % size) > rank);
2012       }
2013     } else {
2014       nlocal = csize/bs;
2015     }
2016     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
2017     rstart = rend - nlocal;
2018     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2019 
2020     /* next, compute all the lengths */
2021     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2022     for (i=0; i<m; i++) {
2023       jend = ii[i+1] - ii[i];
2024       olen = 0;
2025       dlen = 0;
2026       for (j=0; j<jend; j++) {
2027         if (*jj < rstart || *jj >= rend) olen++;
2028         else dlen++;
2029         jj++;
2030       }
2031       olens[i] = olen;
2032       dlens[i] = dlen;
2033     }
2034     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2035     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2036     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2037     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2038     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2040   } else {
2041     PetscInt ml,nl;
2042 
2043     M    = *newmat;
2044     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2046     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2047     /*
2048          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2049        rather than the slower MatSetValues().
2050     */
2051     M->was_assembled = PETSC_TRUE;
2052     M->assembled     = PETSC_FALSE;
2053   }
2054   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2055   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2056   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2057   ii   = aij->i;
2058   jj   = aij->j;
2059   aa   = aij->a;
2060   for (i=0; i<m; i++) {
2061     row   = rstart/bs + i;
2062     nz    = ii[i+1] - ii[i];
2063     cwork = jj;     jj += nz;
2064     vwork = aa;     aa += nz*bs*bs;
2065     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2066   }
2067 
2068   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   *newmat = M;
2071 
2072   /* save submatrix used in processor for next request */
2073   if (call ==  MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2075     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2081 {
2082   MPI_Comm       comm,pcomm;
2083   PetscInt       clocal_size,nrows;
2084   const PetscInt *rows;
2085   PetscMPIInt    size;
2086   IS             crowp,lcolp;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2091   /* make a collective version of 'rowp' */
2092   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2093   if (pcomm==comm) {
2094     crowp = rowp;
2095   } else {
2096     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2097     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2098     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2099     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2100   }
2101   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2102   /* make a local version of 'colp' */
2103   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2105   if (size==1) {
2106     lcolp = colp;
2107   } else {
2108     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2109   }
2110   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2111   /* now we just get the submatrix */
2112   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2113   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2114   /* clean up */
2115   if (pcomm!=comm) {
2116     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2117   }
2118   if (size>1) {
2119     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2125 {
2126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2127   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2128 
2129   PetscFunctionBegin;
2130   if (nghosts) *nghosts = B->nbs;
2131   if (ghosts) *ghosts = baij->garray;
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2136 {
2137   Mat            B;
2138   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2139   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2140   Mat_SeqAIJ     *b;
2141   PetscErrorCode ierr;
2142   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2143   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2144   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2145 
2146   PetscFunctionBegin;
2147   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2148   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2149 
2150   /* ----------------------------------------------------------------
2151      Tell every processor the number of nonzeros per row
2152   */
2153   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2154   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2155     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2156   }
2157   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2158   displs    = recvcounts + size;
2159   for (i=0; i<size; i++) {
2160     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2161     displs[i]     = A->rmap->range[i]/bs;
2162   }
2163 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2164   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2165 #else
2166   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2167   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2168 #endif
2169   /* ---------------------------------------------------------------
2170      Create the sequential matrix of the same type as the local block diagonal
2171   */
2172   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2173   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2174   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2175   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2176   b    = (Mat_SeqAIJ*)B->data;
2177 
2178   /*--------------------------------------------------------------------
2179     Copy my part of matrix column indices over
2180   */
2181   sendcount  = ad->nz + bd->nz;
2182   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2183   a_jsendbuf = ad->j;
2184   b_jsendbuf = bd->j;
2185   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2186   cnt        = 0;
2187   for (i=0; i<n; i++) {
2188 
2189     /* put in lower diagonal portion */
2190     m = bd->i[i+1] - bd->i[i];
2191     while (m > 0) {
2192       /* is it above diagonal (in bd (compressed) numbering) */
2193       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2194       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2195       m--;
2196     }
2197 
2198     /* put in diagonal portion */
2199     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2200       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2201     }
2202 
2203     /* put in upper diagonal portion */
2204     while (m-- > 0) {
2205       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2206     }
2207   }
2208   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2209 
2210   /*--------------------------------------------------------------------
2211     Gather all column indices to all processors
2212   */
2213   for (i=0; i<size; i++) {
2214     recvcounts[i] = 0;
2215     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2216       recvcounts[i] += lens[j];
2217     }
2218   }
2219   displs[0] = 0;
2220   for (i=1; i<size; i++) {
2221     displs[i] = displs[i-1] + recvcounts[i-1];
2222   }
2223 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2224   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2225 #else
2226   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2227 #endif
2228   /*--------------------------------------------------------------------
2229     Assemble the matrix into useable form (note numerical values not yet set)
2230   */
2231   /* set the b->ilen (length of each row) values */
2232   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2233   /* set the b->i indices */
2234   b->i[0] = 0;
2235   for (i=1; i<=A->rmap->N/bs; i++) {
2236     b->i[i] = b->i[i-1] + lens[i-1];
2237   }
2238   ierr = PetscFree(lens);CHKERRQ(ierr);
2239   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2241   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2242 
2243   if (A->symmetric) {
2244     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2245   } else if (A->hermitian) {
2246     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2247   } else if (A->structurally_symmetric) {
2248     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2249   }
2250   *newmat = B;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2255 {
2256   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2257   PetscErrorCode ierr;
2258   Vec            bb1 = NULL;
2259 
2260   PetscFunctionBegin;
2261   if (flag == SOR_APPLY_UPPER) {
2262     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2263     PetscFunctionReturn(0);
2264   }
2265 
2266   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2267     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2268   }
2269 
2270   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2271     if (flag & SOR_ZERO_INITIAL_GUESS) {
2272       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2273       its--;
2274     }
2275 
2276     while (its--) {
2277       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2278       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2279 
2280       /* update rhs: bb1 = bb - B*x */
2281       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2282       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2283 
2284       /* local sweep */
2285       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2286     }
2287   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2288     if (flag & SOR_ZERO_INITIAL_GUESS) {
2289       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2290       its--;
2291     }
2292     while (its--) {
2293       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2294       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2295 
2296       /* update rhs: bb1 = bb - B*x */
2297       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2298       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2299 
2300       /* local sweep */
2301       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2302     }
2303   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2304     if (flag & SOR_ZERO_INITIAL_GUESS) {
2305       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2306       its--;
2307     }
2308     while (its--) {
2309       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2310       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2311 
2312       /* update rhs: bb1 = bb - B*x */
2313       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2314       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2315 
2316       /* local sweep */
2317       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2318     }
2319   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2320 
2321   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2326 {
2327   PetscErrorCode ierr;
2328   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2329   PetscInt       m,N,i,*garray = aij->garray;
2330   PetscInt       ib,jb,bs = A->rmap->bs;
2331   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2332   MatScalar      *a_val = a_aij->a;
2333   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2334   MatScalar      *b_val = b_aij->a;
2335   PetscReal      *work;
2336 
2337   PetscFunctionBegin;
2338   ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr);
2339   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2340   if (type == NORM_2) {
2341     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2342       for (jb=0; jb<bs; jb++) {
2343         for (ib=0; ib<bs; ib++) {
2344           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2345           a_val++;
2346         }
2347       }
2348     }
2349     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2350       for (jb=0; jb<bs; jb++) {
2351         for (ib=0; ib<bs; ib++) {
2352           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2353           b_val++;
2354         }
2355       }
2356     }
2357   } else if (type == NORM_1) {
2358     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2359       for (jb=0; jb<bs; jb++) {
2360         for (ib=0; ib<bs; ib++) {
2361           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2362           a_val++;
2363         }
2364       }
2365     }
2366     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2367       for (jb=0; jb<bs; jb++) {
2368        for (ib=0; ib<bs; ib++) {
2369           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2370           b_val++;
2371         }
2372       }
2373     }
2374   } else if (type == NORM_INFINITY) {
2375     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2376       for (jb=0; jb<bs; jb++) {
2377         for (ib=0; ib<bs; ib++) {
2378           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2379           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2380           a_val++;
2381         }
2382       }
2383     }
2384     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2385       for (jb=0; jb<bs; jb++) {
2386         for (ib=0; ib<bs; ib++) {
2387           int col = garray[b_aij->j[i]] * bs + jb;
2388           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2389           b_val++;
2390         }
2391       }
2392     }
2393   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2394     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2395       for (jb=0; jb<bs; jb++) {
2396         for (ib=0; ib<bs; ib++) {
2397           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2398           a_val++;
2399         }
2400       }
2401     }
2402     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2403       for (jb=0; jb<bs; jb++) {
2404        for (ib=0; ib<bs; ib++) {
2405           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2406           b_val++;
2407         }
2408       }
2409     }
2410   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2411     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2412       for (jb=0; jb<bs; jb++) {
2413         for (ib=0; ib<bs; ib++) {
2414           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2415           a_val++;
2416         }
2417       }
2418     }
2419     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2420       for (jb=0; jb<bs; jb++) {
2421        for (ib=0; ib<bs; ib++) {
2422           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2423           b_val++;
2424         }
2425       }
2426     }
2427   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2428   if (type == NORM_INFINITY) {
2429     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2430   } else {
2431     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2432   }
2433   ierr = PetscFree(work);CHKERRQ(ierr);
2434   if (type == NORM_2) {
2435     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2436   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2437     for (i=0; i<N; i++) reductions[i] /= m;
2438   }
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2443 {
2444   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2445   PetscErrorCode ierr;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2449   A->factorerrortype             = a->A->factorerrortype;
2450   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2451   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2456 {
2457   PetscErrorCode ierr;
2458   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2459   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2460 
2461   PetscFunctionBegin;
2462   if (!Y->preallocated) {
2463     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2464   } else if (!aij->nz) {
2465     PetscInt nonew = aij->nonew;
2466     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2467     aij->nonew = nonew;
2468   }
2469   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2474 {
2475   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2480   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2481   if (d) {
2482     PetscInt rstart;
2483     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2484     *d += rstart/A->rmap->bs;
2485 
2486   }
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2491 {
2492   PetscFunctionBegin;
2493   *a = ((Mat_MPIBAIJ*)A->data)->A;
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 /* -------------------------------------------------------------------*/
2498 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2499                                        MatGetRow_MPIBAIJ,
2500                                        MatRestoreRow_MPIBAIJ,
2501                                        MatMult_MPIBAIJ,
2502                                 /* 4*/ MatMultAdd_MPIBAIJ,
2503                                        MatMultTranspose_MPIBAIJ,
2504                                        MatMultTransposeAdd_MPIBAIJ,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                 /*10*/ NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        MatSOR_MPIBAIJ,
2512                                        MatTranspose_MPIBAIJ,
2513                                 /*15*/ MatGetInfo_MPIBAIJ,
2514                                        MatEqual_MPIBAIJ,
2515                                        MatGetDiagonal_MPIBAIJ,
2516                                        MatDiagonalScale_MPIBAIJ,
2517                                        MatNorm_MPIBAIJ,
2518                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2519                                        MatAssemblyEnd_MPIBAIJ,
2520                                        MatSetOption_MPIBAIJ,
2521                                        MatZeroEntries_MPIBAIJ,
2522                                 /*24*/ MatZeroRows_MPIBAIJ,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                 /*29*/ MatSetUp_MPIBAIJ,
2528                                        NULL,
2529                                        NULL,
2530                                        MatGetDiagonalBlock_MPIBAIJ,
2531                                        NULL,
2532                                 /*34*/ MatDuplicate_MPIBAIJ,
2533                                        NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        NULL,
2537                                 /*39*/ MatAXPY_MPIBAIJ,
2538                                        MatCreateSubMatrices_MPIBAIJ,
2539                                        MatIncreaseOverlap_MPIBAIJ,
2540                                        MatGetValues_MPIBAIJ,
2541                                        MatCopy_MPIBAIJ,
2542                                 /*44*/ NULL,
2543                                        MatScale_MPIBAIJ,
2544                                        MatShift_MPIBAIJ,
2545                                        NULL,
2546                                        MatZeroRowsColumns_MPIBAIJ,
2547                                 /*49*/ NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2553                                        NULL,
2554                                        MatSetUnfactored_MPIBAIJ,
2555                                        MatPermute_MPIBAIJ,
2556                                        MatSetValuesBlocked_MPIBAIJ,
2557                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2558                                        MatDestroy_MPIBAIJ,
2559                                        MatView_MPIBAIJ,
2560                                        NULL,
2561                                        NULL,
2562                                 /*64*/ NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2568                                        NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                 /*74*/ NULL,
2573                                        MatFDColoringApply_BAIJ,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                 /*79*/ NULL,
2578                                        NULL,
2579                                        NULL,
2580                                        NULL,
2581                                        MatLoad_MPIBAIJ,
2582                                 /*84*/ NULL,
2583                                        NULL,
2584                                        NULL,
2585                                        NULL,
2586                                        NULL,
2587                                 /*89*/ NULL,
2588                                        NULL,
2589                                        NULL,
2590                                        NULL,
2591                                        NULL,
2592                                 /*94*/ NULL,
2593                                        NULL,
2594                                        NULL,
2595                                        NULL,
2596                                        NULL,
2597                                 /*99*/ NULL,
2598                                        NULL,
2599                                        NULL,
2600                                        MatConjugate_MPIBAIJ,
2601                                        NULL,
2602                                 /*104*/NULL,
2603                                        MatRealPart_MPIBAIJ,
2604                                        MatImaginaryPart_MPIBAIJ,
2605                                        NULL,
2606                                        NULL,
2607                                 /*109*/NULL,
2608                                        NULL,
2609                                        NULL,
2610                                        NULL,
2611                                        MatMissingDiagonal_MPIBAIJ,
2612                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2613                                        NULL,
2614                                        MatGetGhosts_MPIBAIJ,
2615                                        NULL,
2616                                        NULL,
2617                                 /*119*/NULL,
2618                                        NULL,
2619                                        NULL,
2620                                        NULL,
2621                                        MatGetMultiProcBlock_MPIBAIJ,
2622                                 /*124*/NULL,
2623                                        MatGetColumnReductions_MPIBAIJ,
2624                                        MatInvertBlockDiagonal_MPIBAIJ,
2625                                        NULL,
2626                                        NULL,
2627                                /*129*/ NULL,
2628                                        NULL,
2629                                        NULL,
2630                                        NULL,
2631                                        NULL,
2632                                /*134*/ NULL,
2633                                        NULL,
2634                                        NULL,
2635                                        NULL,
2636                                        NULL,
2637                                /*139*/ MatSetBlockSizes_Default,
2638                                        NULL,
2639                                        NULL,
2640                                        MatFDColoringSetUp_MPIXAIJ,
2641                                        NULL,
2642                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2643 };
2644 
2645 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2646 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2647 
2648 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2649 {
2650   PetscInt       m,rstart,cstart,cend;
2651   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2652   const PetscInt *JJ    =NULL;
2653   PetscScalar    *values=NULL;
2654   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2655   PetscErrorCode ierr;
2656   PetscBool      nooffprocentries;
2657 
2658   PetscFunctionBegin;
2659   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2660   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2661   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2662   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2663   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2664   m      = B->rmap->n/bs;
2665   rstart = B->rmap->rstart/bs;
2666   cstart = B->cmap->rstart/bs;
2667   cend   = B->cmap->rend/bs;
2668 
2669   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2670   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2671   for (i=0; i<m; i++) {
2672     nz = ii[i+1] - ii[i];
2673     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2674     nz_max = PetscMax(nz_max,nz);
2675     dlen   = 0;
2676     olen   = 0;
2677     JJ     = jj + ii[i];
2678     for (j=0; j<nz; j++) {
2679       if (*JJ < cstart || *JJ >= cend) olen++;
2680       else dlen++;
2681       JJ++;
2682     }
2683     d_nnz[i] = dlen;
2684     o_nnz[i] = olen;
2685   }
2686   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2687   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2688 
2689   values = (PetscScalar*)V;
2690   if (!values) {
2691     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2692   }
2693   for (i=0; i<m; i++) {
2694     PetscInt          row    = i + rstart;
2695     PetscInt          ncols  = ii[i+1] - ii[i];
2696     const PetscInt    *icols = jj + ii[i];
2697     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2698       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2699       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2700     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2701       PetscInt j;
2702       for (j=0; j<ncols; j++) {
2703         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2704         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2705       }
2706     }
2707   }
2708 
2709   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2710   nooffprocentries    = B->nooffprocentries;
2711   B->nooffprocentries = PETSC_TRUE;
2712   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2713   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2714   B->nooffprocentries = nooffprocentries;
2715 
2716   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 /*@C
2721    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2722 
2723    Collective
2724 
2725    Input Parameters:
2726 +  B - the matrix
2727 .  bs - the block size
2728 .  i - the indices into j for the start of each local row (starts with zero)
2729 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2730 -  v - optional values in the matrix
2731 
2732    Level: advanced
2733 
2734    Notes:
2735     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2736    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2737    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2738    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2739    block column and the second index is over columns within a block.
2740 
2741    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2742 
2743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2744 @*/
2745 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2746 {
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2751   PetscValidType(B,1);
2752   PetscValidLogicalCollectiveInt(B,bs,2);
2753   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2758 {
2759   Mat_MPIBAIJ    *b;
2760   PetscErrorCode ierr;
2761   PetscInt       i;
2762   PetscMPIInt    size;
2763 
2764   PetscFunctionBegin;
2765   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2768   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2769 
2770   if (d_nnz) {
2771     for (i=0; i<B->rmap->n/bs; i++) {
2772       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2773     }
2774   }
2775   if (o_nnz) {
2776     for (i=0; i<B->rmap->n/bs; i++) {
2777       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2778     }
2779   }
2780 
2781   b      = (Mat_MPIBAIJ*)B->data;
2782   b->bs2 = bs*bs;
2783   b->mbs = B->rmap->n/bs;
2784   b->nbs = B->cmap->n/bs;
2785   b->Mbs = B->rmap->N/bs;
2786   b->Nbs = B->cmap->N/bs;
2787 
2788   for (i=0; i<=b->size; i++) {
2789     b->rangebs[i] = B->rmap->range[i]/bs;
2790   }
2791   b->rstartbs = B->rmap->rstart/bs;
2792   b->rendbs   = B->rmap->rend/bs;
2793   b->cstartbs = B->cmap->rstart/bs;
2794   b->cendbs   = B->cmap->rend/bs;
2795 
2796 #if defined(PETSC_USE_CTABLE)
2797   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2798 #else
2799   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2800 #endif
2801   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2802   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2803   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2804 
2805   /* Because the B will have been resized we simply destroy it and create a new one each time */
2806   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2807   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2808   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2809   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2810   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2811   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2812 
2813   if (!B->preallocated) {
2814     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2815     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2816     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2817     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2818     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2819   }
2820 
2821   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2822   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2823   B->preallocated  = PETSC_TRUE;
2824   B->was_assembled = PETSC_FALSE;
2825   B->assembled     = PETSC_FALSE;
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2830 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2831 
2832 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2833 {
2834   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2835   PetscErrorCode ierr;
2836   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2837   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2838   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2839 
2840   PetscFunctionBegin;
2841   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2842   ii[0] = 0;
2843   for (i=0; i<M; i++) {
2844     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2845     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2846     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2847     /* remove one from count of matrix has diagonal */
2848     for (j=id[i]; j<id[i+1]; j++) {
2849       if (jd[j] == i) {ii[i+1]--;break;}
2850     }
2851   }
2852   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2853   cnt  = 0;
2854   for (i=0; i<M; i++) {
2855     for (j=io[i]; j<io[i+1]; j++) {
2856       if (garray[jo[j]] > rstart) break;
2857       jj[cnt++] = garray[jo[j]];
2858     }
2859     for (k=id[i]; k<id[i+1]; k++) {
2860       if (jd[k] != i) {
2861         jj[cnt++] = rstart + jd[k];
2862       }
2863     }
2864     for (; j<io[i+1]; j++) {
2865       jj[cnt++] = garray[jo[j]];
2866     }
2867   }
2868   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2873 
2874 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2875 
2876 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2877 {
2878   PetscErrorCode ierr;
2879   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2880   Mat            B;
2881   Mat_MPIAIJ     *b;
2882 
2883   PetscFunctionBegin;
2884   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2885 
2886   if (reuse == MAT_REUSE_MATRIX) {
2887     B = *newmat;
2888   } else {
2889     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2890     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2891     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2892     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2893     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2894     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2895   }
2896   b = (Mat_MPIAIJ*) B->data;
2897 
2898   if (reuse == MAT_REUSE_MATRIX) {
2899     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2900     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2901   } else {
2902     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2903     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2904     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2905     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2906     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2907     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2908     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2909   }
2910   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2911   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2912 
2913   if (reuse == MAT_INPLACE_MATRIX) {
2914     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2915   } else {
2916    *newmat = B;
2917   }
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 /*MC
2922    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2923 
2924    Options Database Keys:
2925 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2926 . -mat_block_size <bs> - set the blocksize used to store the matrix
2927 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2928 - -mat_use_hash_table <fact>
2929 
2930    Level: beginner
2931 
2932    Notes:
2933     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2934     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2935 
2936 .seealso: MatCreateBAIJ
2937 M*/
2938 
2939 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2940 
2941 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2942 {
2943   Mat_MPIBAIJ    *b;
2944   PetscErrorCode ierr;
2945   PetscBool      flg = PETSC_FALSE;
2946 
2947   PetscFunctionBegin;
2948   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2949   B->data = (void*)b;
2950 
2951   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2952   B->assembled = PETSC_FALSE;
2953 
2954   B->insertmode = NOT_SET_VALUES;
2955   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2956   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2957 
2958   /* build local table of row and column ownerships */
2959   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2960 
2961   /* build cache for off array entries formed */
2962   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2963 
2964   b->donotstash  = PETSC_FALSE;
2965   b->colmap      = NULL;
2966   b->garray      = NULL;
2967   b->roworiented = PETSC_TRUE;
2968 
2969   /* stuff used in block assembly */
2970   b->barray = NULL;
2971 
2972   /* stuff used for matrix vector multiply */
2973   b->lvec  = NULL;
2974   b->Mvctx = NULL;
2975 
2976   /* stuff for MatGetRow() */
2977   b->rowindices   = NULL;
2978   b->rowvalues    = NULL;
2979   b->getrowactive = PETSC_FALSE;
2980 
2981   /* hash table stuff */
2982   b->ht           = NULL;
2983   b->hd           = NULL;
2984   b->ht_size      = 0;
2985   b->ht_flag      = PETSC_FALSE;
2986   b->ht_fact      = 0;
2987   b->ht_total_ct  = 0;
2988   b->ht_insert_ct = 0;
2989 
2990   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2991   b->ijonly = PETSC_FALSE;
2992 
2993   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2994   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2995   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2996 #if defined(PETSC_HAVE_HYPRE)
2997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2998 #endif
2999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3006   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3007 
3008   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3009   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3010   if (flg) {
3011     PetscReal fact = 1.39;
3012     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3013     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3014     if (fact <= 1.0) fact = 1.39;
3015     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3016     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3017   }
3018   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 /*MC
3023    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3024 
3025    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3026    and MATMPIBAIJ otherwise.
3027 
3028    Options Database Keys:
3029 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3030 
3031   Level: beginner
3032 
3033 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3034 M*/
3035 
3036 /*@C
3037    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3038    (block compressed row).  For good matrix assembly performance
3039    the user should preallocate the matrix storage by setting the parameters
3040    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3041    performance can be increased by more than a factor of 50.
3042 
3043    Collective on Mat
3044 
3045    Input Parameters:
3046 +  B - the matrix
3047 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3048           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3049 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3050            submatrix  (same for all local rows)
3051 .  d_nnz - array containing the number of block nonzeros in the various block rows
3052            of the in diagonal portion of the local (possibly different for each block
3053            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3054            set it even if it is zero.
3055 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3056            submatrix (same for all local rows).
3057 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3058            off-diagonal portion of the local submatrix (possibly different for
3059            each block row) or NULL.
3060 
3061    If the *_nnz parameter is given then the *_nz parameter is ignored
3062 
3063    Options Database Keys:
3064 +   -mat_block_size - size of the blocks to use
3065 -   -mat_use_hash_table <fact>
3066 
3067    Notes:
3068    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3069    than it must be used on all processors that share the object for that argument.
3070 
3071    Storage Information:
3072    For a square global matrix we define each processor's diagonal portion
3073    to be its local rows and the corresponding columns (a square submatrix);
3074    each processor's off-diagonal portion encompasses the remainder of the
3075    local matrix (a rectangular submatrix).
3076 
3077    The user can specify preallocated storage for the diagonal part of
3078    the local submatrix with either d_nz or d_nnz (not both).  Set
3079    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3080    memory allocation.  Likewise, specify preallocated storage for the
3081    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3082 
3083    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3084    the figure below we depict these three local rows and all columns (0-11).
3085 
3086 .vb
3087            0 1 2 3 4 5 6 7 8 9 10 11
3088           --------------------------
3089    row 3  |o o o d d d o o o o  o  o
3090    row 4  |o o o d d d o o o o  o  o
3091    row 5  |o o o d d d o o o o  o  o
3092           --------------------------
3093 .ve
3094 
3095    Thus, any entries in the d locations are stored in the d (diagonal)
3096    submatrix, and any entries in the o locations are stored in the
3097    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3098    stored simply in the MATSEQBAIJ format for compressed row storage.
3099 
3100    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3101    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3102    In general, for PDE problems in which most nonzeros are near the diagonal,
3103    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3104    or you will get TERRIBLE performance; see the users' manual chapter on
3105    matrices.
3106 
3107    You can call MatGetInfo() to get information on how effective the preallocation was;
3108    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3109    You can also run with the option -info and look for messages with the string
3110    malloc in them to see if additional memory allocation was needed.
3111 
3112    Level: intermediate
3113 
3114 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3115 @*/
3116 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3117 {
3118   PetscErrorCode ierr;
3119 
3120   PetscFunctionBegin;
3121   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3122   PetscValidType(B,1);
3123   PetscValidLogicalCollectiveInt(B,bs,2);
3124   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 /*@C
3129    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3130    (block compressed row).  For good matrix assembly performance
3131    the user should preallocate the matrix storage by setting the parameters
3132    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3133    performance can be increased by more than a factor of 50.
3134 
3135    Collective
3136 
3137    Input Parameters:
3138 +  comm - MPI communicator
3139 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3140           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3141 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3142            This value should be the same as the local size used in creating the
3143            y vector for the matrix-vector product y = Ax.
3144 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3145            This value should be the same as the local size used in creating the
3146            x vector for the matrix-vector product y = Ax.
3147 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3148 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3149 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3150            submatrix  (same for all local rows)
3151 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3152            of the in diagonal portion of the local (possibly different for each block
3153            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3154            and set it even if it is zero.
3155 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3156            submatrix (same for all local rows).
3157 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3158            off-diagonal portion of the local submatrix (possibly different for
3159            each block row) or NULL.
3160 
3161    Output Parameter:
3162 .  A - the matrix
3163 
3164    Options Database Keys:
3165 +   -mat_block_size - size of the blocks to use
3166 -   -mat_use_hash_table <fact>
3167 
3168    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3169    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3170    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3171 
3172    Notes:
3173    If the *_nnz parameter is given then the *_nz parameter is ignored
3174 
3175    A nonzero block is any block that as 1 or more nonzeros in it
3176 
3177    The user MUST specify either the local or global matrix dimensions
3178    (possibly both).
3179 
3180    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3181    than it must be used on all processors that share the object for that argument.
3182 
3183    Storage Information:
3184    For a square global matrix we define each processor's diagonal portion
3185    to be its local rows and the corresponding columns (a square submatrix);
3186    each processor's off-diagonal portion encompasses the remainder of the
3187    local matrix (a rectangular submatrix).
3188 
3189    The user can specify preallocated storage for the diagonal part of
3190    the local submatrix with either d_nz or d_nnz (not both).  Set
3191    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3192    memory allocation.  Likewise, specify preallocated storage for the
3193    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3194 
3195    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3196    the figure below we depict these three local rows and all columns (0-11).
3197 
3198 .vb
3199            0 1 2 3 4 5 6 7 8 9 10 11
3200           --------------------------
3201    row 3  |o o o d d d o o o o  o  o
3202    row 4  |o o o d d d o o o o  o  o
3203    row 5  |o o o d d d o o o o  o  o
3204           --------------------------
3205 .ve
3206 
3207    Thus, any entries in the d locations are stored in the d (diagonal)
3208    submatrix, and any entries in the o locations are stored in the
3209    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3210    stored simply in the MATSEQBAIJ format for compressed row storage.
3211 
3212    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3213    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3214    In general, for PDE problems in which most nonzeros are near the diagonal,
3215    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3216    or you will get TERRIBLE performance; see the users' manual chapter on
3217    matrices.
3218 
3219    Level: intermediate
3220 
3221 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3222 @*/
3223 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3224 {
3225   PetscErrorCode ierr;
3226   PetscMPIInt    size;
3227 
3228   PetscFunctionBegin;
3229   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3230   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3231   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3232   if (size > 1) {
3233     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3234     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3235   } else {
3236     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3237     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3238   }
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3243 {
3244   Mat            mat;
3245   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3246   PetscErrorCode ierr;
3247   PetscInt       len=0;
3248 
3249   PetscFunctionBegin;
3250   *newmat = NULL;
3251   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3252   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3253   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3254 
3255   mat->factortype   = matin->factortype;
3256   mat->preallocated = PETSC_TRUE;
3257   mat->assembled    = PETSC_TRUE;
3258   mat->insertmode   = NOT_SET_VALUES;
3259 
3260   a             = (Mat_MPIBAIJ*)mat->data;
3261   mat->rmap->bs = matin->rmap->bs;
3262   a->bs2        = oldmat->bs2;
3263   a->mbs        = oldmat->mbs;
3264   a->nbs        = oldmat->nbs;
3265   a->Mbs        = oldmat->Mbs;
3266   a->Nbs        = oldmat->Nbs;
3267 
3268   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3269   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3270 
3271   a->size         = oldmat->size;
3272   a->rank         = oldmat->rank;
3273   a->donotstash   = oldmat->donotstash;
3274   a->roworiented  = oldmat->roworiented;
3275   a->rowindices   = NULL;
3276   a->rowvalues    = NULL;
3277   a->getrowactive = PETSC_FALSE;
3278   a->barray       = NULL;
3279   a->rstartbs     = oldmat->rstartbs;
3280   a->rendbs       = oldmat->rendbs;
3281   a->cstartbs     = oldmat->cstartbs;
3282   a->cendbs       = oldmat->cendbs;
3283 
3284   /* hash table stuff */
3285   a->ht           = NULL;
3286   a->hd           = NULL;
3287   a->ht_size      = 0;
3288   a->ht_flag      = oldmat->ht_flag;
3289   a->ht_fact      = oldmat->ht_fact;
3290   a->ht_total_ct  = 0;
3291   a->ht_insert_ct = 0;
3292 
3293   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3294   if (oldmat->colmap) {
3295 #if defined(PETSC_USE_CTABLE)
3296     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3297 #else
3298     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3299     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3300     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3301 #endif
3302   } else a->colmap = NULL;
3303 
3304   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3305     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3306     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3307     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3308   } else a->garray = NULL;
3309 
3310   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3311   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3312   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3313   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3314   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3315 
3316   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3317   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3318   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3319   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3320   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3321   *newmat = mat;
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3326 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3327 {
3328   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3329   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3330   PetscScalar    *matvals;
3331   PetscErrorCode ierr;
3332 
3333   PetscFunctionBegin;
3334   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3335 
3336   /* read in matrix header */
3337   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3338   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3339   M  = header[1]; N = header[2]; nz = header[3];
3340   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3341   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3342   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3343 
3344   /* set block sizes from the viewer's .info file */
3345   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3346   /* set local sizes if not set already */
3347   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3348   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3349   /* set global sizes if not set already */
3350   if (mat->rmap->N < 0) mat->rmap->N = M;
3351   if (mat->cmap->N < 0) mat->cmap->N = N;
3352   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3353   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3354 
3355   /* check if the matrix sizes are correct */
3356   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3357   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3358   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3359   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3360   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr);
3361   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr);
3362   mbs = m/bs; nbs = n/bs;
3363 
3364   /* read in row lengths and build row indices */
3365   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3366   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3367   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3368   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3369   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3370 
3371   /* read in column indices and matrix values */
3372   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3373   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3374   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3375 
3376   { /* preallocate matrix storage */
3377     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3378     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3379     PetscBool  sbaij,done;
3380     PetscInt   *d_nnz,*o_nnz;
3381 
3382     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3383     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3384     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3385     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3386     for (i=0; i<mbs; i++) {
3387       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3388       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3389       for (k=0; k<bs; k++) {
3390         PetscInt row = bs*i + k;
3391         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3392           PetscInt col = colidxs[j];
3393           if (!sbaij || col >= row) {
3394             if (col >= cs && col < ce) {
3395               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3396             } else {
3397               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3398               if (done) o_nnz[i]++;
3399             }
3400           }
3401         }
3402       }
3403     }
3404     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3405     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3406     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3407     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3408     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3409   }
3410 
3411   /* store matrix values */
3412   for (i=0; i<m; i++) {
3413     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3414     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3415   }
3416 
3417   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3418   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3419   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3420   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3425 {
3426   PetscErrorCode ierr;
3427   PetscBool      isbinary;
3428 
3429   PetscFunctionBegin;
3430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3431   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3432   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 /*@
3437    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3438 
3439    Input Parameters:
3440 +  mat  - the matrix
3441 -  fact - factor
3442 
3443    Not Collective, each process can use a different factor
3444 
3445    Level: advanced
3446 
3447   Notes:
3448    This can also be set by the command line option: -mat_use_hash_table <fact>
3449 
3450 .seealso: MatSetOption()
3451 @*/
3452 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3453 {
3454   PetscErrorCode ierr;
3455 
3456   PetscFunctionBegin;
3457   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3458   PetscFunctionReturn(0);
3459 }
3460 
3461 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3462 {
3463   Mat_MPIBAIJ *baij;
3464 
3465   PetscFunctionBegin;
3466   baij          = (Mat_MPIBAIJ*)mat->data;
3467   baij->ht_fact = fact;
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3472 {
3473   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3474   PetscBool      flg;
3475   PetscErrorCode ierr;
3476 
3477   PetscFunctionBegin;
3478   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3479   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3480   if (Ad)     *Ad     = a->A;
3481   if (Ao)     *Ao     = a->B;
3482   if (colmap) *colmap = a->garray;
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 /*
3487     Special version for direct calls from Fortran (to eliminate two function call overheads
3488 */
3489 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3490 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3491 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3492 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3493 #endif
3494 
3495 /*@C
3496   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3497 
3498   Collective on Mat
3499 
3500   Input Parameters:
3501 + mat - the matrix
3502 . min - number of input rows
3503 . im - input rows
3504 . nin - number of input columns
3505 . in - input columns
3506 . v - numerical values input
3507 - addvin - INSERT_VALUES or ADD_VALUES
3508 
3509   Notes:
3510     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3511 
3512   Level: advanced
3513 
3514 .seealso:   MatSetValuesBlocked()
3515 @*/
3516 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3517 {
3518   /* convert input arguments to C version */
3519   Mat        mat  = *matin;
3520   PetscInt   m    = *min, n = *nin;
3521   InsertMode addv = *addvin;
3522 
3523   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3524   const MatScalar *value;
3525   MatScalar       *barray     = baij->barray;
3526   PetscBool       roworiented = baij->roworiented;
3527   PetscErrorCode  ierr;
3528   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3529   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3530   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3531 
3532   PetscFunctionBegin;
3533   /* tasks normally handled by MatSetValuesBlocked() */
3534   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3535   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3536   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3537   if (mat->assembled) {
3538     mat->was_assembled = PETSC_TRUE;
3539     mat->assembled     = PETSC_FALSE;
3540   }
3541   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3542 
3543   if (!barray) {
3544     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3545     baij->barray = barray;
3546   }
3547 
3548   if (roworiented) stepval = (n-1)*bs;
3549   else stepval = (m-1)*bs;
3550 
3551   for (i=0; i<m; i++) {
3552     if (im[i] < 0) continue;
3553     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3554     if (im[i] >= rstart && im[i] < rend) {
3555       row = im[i] - rstart;
3556       for (j=0; j<n; j++) {
3557         /* If NumCol = 1 then a copy is not required */
3558         if ((roworiented) && (n == 1)) {
3559           barray = (MatScalar*)v + i*bs2;
3560         } else if ((!roworiented) && (m == 1)) {
3561           barray = (MatScalar*)v + j*bs2;
3562         } else { /* Here a copy is required */
3563           if (roworiented) {
3564             value = v + i*(stepval+bs)*bs + j*bs;
3565           } else {
3566             value = v + j*(stepval+bs)*bs + i*bs;
3567           }
3568           for (ii=0; ii<bs; ii++,value+=stepval) {
3569             for (jj=0; jj<bs; jj++) {
3570               *barray++ = *value++;
3571             }
3572           }
3573           barray -=bs2;
3574         }
3575 
3576         if (in[j] >= cstart && in[j] < cend) {
3577           col  = in[j] - cstart;
3578           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3579         } else if (in[j] < 0) continue;
3580         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3581         else {
3582           if (mat->was_assembled) {
3583             if (!baij->colmap) {
3584               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3585             }
3586 
3587 #if defined(PETSC_USE_DEBUG)
3588 #if defined(PETSC_USE_CTABLE)
3589             { PetscInt data;
3590               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3591               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3592             }
3593 #else
3594             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3595 #endif
3596 #endif
3597 #if defined(PETSC_USE_CTABLE)
3598             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3599             col  = (col - 1)/bs;
3600 #else
3601             col = (baij->colmap[in[j]] - 1)/bs;
3602 #endif
3603             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3604               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3605               col  =  in[j];
3606             }
3607           } else col = in[j];
3608           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3609         }
3610       }
3611     } else {
3612       if (!baij->donotstash) {
3613         if (roworiented) {
3614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3615         } else {
3616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3617         }
3618       }
3619     }
3620   }
3621 
3622   /* task normally handled by MatSetValuesBlocked() */
3623   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3624   PetscFunctionReturn(0);
3625 }
3626 
3627 /*@
3628      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3629          CSR format the local rows.
3630 
3631    Collective
3632 
3633    Input Parameters:
3634 +  comm - MPI communicator
3635 .  bs - the block size, only a block size of 1 is supported
3636 .  m - number of local rows (Cannot be PETSC_DECIDE)
3637 .  n - This value should be the same as the local size used in creating the
3638        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3639        calculated if N is given) For square matrices n is almost always m.
3640 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3641 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3642 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3643 .   j - column indices
3644 -   a - matrix values
3645 
3646    Output Parameter:
3647 .   mat - the matrix
3648 
3649    Level: intermediate
3650 
3651    Notes:
3652        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3653      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3654      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3655 
3656      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3657      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3658      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3659      with column-major ordering within blocks.
3660 
3661        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3662 
3663 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3664           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3665 @*/
3666 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3667 {
3668   PetscErrorCode ierr;
3669 
3670   PetscFunctionBegin;
3671   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3672   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3673   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3674   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3675   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3676   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3677   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3678   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3679   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3680   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3685 {
3686   PetscErrorCode ierr;
3687   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3688   PetscInt       *indx;
3689   PetscScalar    *values;
3690 
3691   PetscFunctionBegin;
3692   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3693   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3694     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3695     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3696     PetscInt       *bindx,rmax=a->rmax,j;
3697     PetscMPIInt    rank,size;
3698 
3699     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3700     mbs = m/bs; Nbs = N/cbs;
3701     if (n == PETSC_DECIDE) {
3702       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr);
3703     }
3704     nbs = n/cbs;
3705 
3706     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3707     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3708 
3709     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3710     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
3711     if (rank == size-1) {
3712       /* Check sum(nbs) = Nbs */
3713       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3714     }
3715 
3716     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3717     for (i=0; i<mbs; i++) {
3718       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3719       nnz = nnz/bs;
3720       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3721       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3722       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3723     }
3724     ierr = PetscFree(bindx);CHKERRQ(ierr);
3725 
3726     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3727     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3728     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3729     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3730     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3731     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3732     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3733     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3734   }
3735 
3736   /* numeric phase */
3737   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3738   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3739 
3740   for (i=0; i<m; i++) {
3741     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3742     Ii   = i + rstart;
3743     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3744     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3745   }
3746   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3747   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3748   PetscFunctionReturn(0);
3749 }
3750