xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0e9bae810fdaeb60e2713eaa8ddb89f42e079fd1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         mat->assembled = PETSC_FALSE;
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
275 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
276 {
277   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
278   const PetscScalar *value;
279   MatScalar         *barray=baij->barray;
280   PetscBool         roworiented = baij->roworiented;
281   PetscErrorCode    ierr;
282   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
283   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
284   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
285 
286   PetscFunctionBegin;
287   if(!barray) {
288     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
289     baij->barray = barray;
290   }
291 
292   if (roworiented) {
293     stepval = (n-1)*bs;
294   } else {
295     stepval = (m-1)*bs;
296   }
297   for (i=0; i<m; i++) {
298     if (im[i] < 0) continue;
299 #if defined(PETSC_USE_DEBUG)
300     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
301 #endif
302     if (im[i] >= rstart && im[i] < rend) {
303       row = im[i] - rstart;
304       for (j=0; j<n; j++) {
305         /* If NumCol = 1 then a copy is not required */
306         if ((roworiented) && (n == 1)) {
307           barray = (MatScalar*)v + i*bs2;
308         } else if((!roworiented) && (m == 1)) {
309           barray = (MatScalar*)v + j*bs2;
310         } else { /* Here a copy is required */
311           if (roworiented) {
312             value = v + (i*(stepval+bs) + j)*bs;
313           } else {
314 	    value = v + (j*(stepval+bs) + i)*bs;
315           }
316           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
317             for (jj=0; jj<bs; jj++) {
318               barray[jj]  = value[jj];
319             }
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
356               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscBool      roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscBool         roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = PetscSqrtReal(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 /*
726   Creates the hash table, and sets the table
727   This table is created only once.
728   If new entried need to be added to the matrix
729   then the hash table has to be destroyed and
730   recreated.
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
735 {
736   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
737   Mat            A = baij->A,B=baij->B;
738   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
739   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740   PetscErrorCode ierr;
741   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
742   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
743   PetscInt       *HT,key;
744   MatScalar      **HD;
745   PetscReal      tmp;
746 #if defined(PETSC_USE_INFO)
747   PetscInt       ct=0,max=0;
748 #endif
749 
750   PetscFunctionBegin;
751   if (baij->ht) PetscFunctionReturn(0);
752 
753   baij->ht_size = (PetscInt)(factor*nz);
754   ht_size       = baij->ht_size;
755 
756   /* Allocate Memory for Hash Table */
757   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
760   HD   = baij->hd;
761   HT   = baij->ht;
762 
763   /* Loop Over A */
764   for (i=0; i<a->mbs; i++) {
765     for (j=ai[i]; j<ai[i+1]; j++) {
766       row = i+rstart;
767       col = aj[j]+cstart;
768 
769       key = row*Nbs + col + 1;
770       h1  = HASH(ht_size,key,tmp);
771       for (k=0; k<ht_size; k++){
772         if (!HT[(h1+k)%ht_size]) {
773           HT[(h1+k)%ht_size] = key;
774           HD[(h1+k)%ht_size] = a->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_INFO)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_INFO)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787   /* Loop Over B */
788   for (i=0; i<b->mbs; i++) {
789     for (j=bi[i]; j<bi[i+1]; j++) {
790       row = i+rstart;
791       col = garray[bj[j]];
792       key = row*Nbs + col + 1;
793       h1  = HASH(ht_size,key,tmp);
794       for (k=0; k<ht_size; k++){
795         if (!HT[(h1+k)%ht_size]) {
796           HT[(h1+k)%ht_size] = key;
797           HD[(h1+k)%ht_size] = b->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_INFO)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_INFO)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810 
811   /* Print Summary */
812 #if defined(PETSC_USE_INFO)
813   for (i=0,j=0; i<ht_size; i++) {
814     if (HT[i]) {j++;}
815   }
816   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
817 #endif
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826   PetscErrorCode ierr;
827   PetscInt       nstash,reallocs;
828   InsertMode     addv;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash || mat->nooffprocentries) {
832     PetscFunctionReturn(0);
833   }
834 
835   /* make sure all processors are either in INSERTMODE or ADDMODE */
836   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
837   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
838   mat->insertmode = addv; /* in case this processor had no cache */
839 
840   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
841   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
842   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
843   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855   PetscErrorCode ierr;
856   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
857   PetscInt       *row,*col;
858   PetscBool      r1,r2,r3,other_disassembled;
859   MatScalar      *val;
860   InsertMode     addv = mat->insertmode;
861   PetscMPIInt    n;
862 
863   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864   PetscFunctionBegin;
865   if (!baij->donotstash && !mat->nooffprocentries) {
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
877         i = j;
878       }
879     }
880     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887     baij->roworiented = PETSC_FALSE;
888     a->roworiented    = PETSC_FALSE;
889     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
890     while (1) {
891       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
892       if (!flg) break;
893 
894       for (i=0; i<n;) {
895         /* Now identify the consecutive vals belonging to the same row */
896         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
897         if (j < n) ncols = j-i;
898         else       ncols = n-i;
899         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
900         i = j;
901       }
902     }
903     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
904     baij->roworiented = r1;
905     a->roworiented    = r2;
906     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
907   }
908 
909   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
911 
912   /* determine if any processor has disassembled, if so we must
913      also disassemble ourselfs, in order that we may reassemble. */
914   /*
915      if nonzero structure of submatrix B cannot change then we know that
916      no processor disassembled thus we can skip this stuff
917   */
918   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
919     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
920     if (mat->was_assembled && !other_disassembled) {
921       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
922     }
923   }
924 
925   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
926     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
927   }
928   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
929   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
931 
932 #if defined(PETSC_USE_INFO)
933   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
934     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
935     baij->ht_total_ct  = 0;
936     baij->ht_insert_ct = 0;
937   }
938 #endif
939   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
941     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
942     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
943   }
944 
945   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
946   baij->rowvalues = 0;
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
953 {
954   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       size = baij->size,rank = baij->rank;
957   PetscInt          bs = mat->rmap->bs;
958   PetscBool         iascii,isdraw;
959   PetscViewer       sviewer;
960   PetscViewerFormat format;
961 
962   PetscFunctionBegin;
963   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
964   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
965   if (iascii) {
966     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
967     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
968       MatInfo info;
969       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
970       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1062       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(&A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1073 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1074 {
1075   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1076   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1077   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1078   PetscErrorCode ierr;
1079   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1080   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1081   int            fd;
1082   PetscScalar    *column_values;
1083   FILE           *file;
1084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1085   PetscInt       message_count,flowcontrolcount;
1086 
1087   PetscFunctionBegin;
1088   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1089   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1090   nz   = bs2*(A->nz + B->nz);
1091   rlen = mat->rmap->n;
1092   if (!rank) {
1093     header[0] = MAT_FILE_CLASSID;
1094     header[1] = mat->rmap->N;
1095     header[2] = mat->cmap->N;
1096     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1097     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1098     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099     /* get largest number of rows any processor has */
1100     range = mat->rmap->range;
1101     for (i=1; i<size; i++) {
1102       rlen = PetscMax(rlen,range[i+1] - range[i]);
1103     }
1104   } else {
1105     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1106   }
1107 
1108   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1109   /* compute lengths of each row  */
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1295   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1296 #if defined (PETSC_USE_CTABLE)
1297   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1303   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMult_MPIBAIJ"
1325 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329   PetscInt       nt;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1333   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1334   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1335   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1345 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1353   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1354   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1360 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364   PetscBool      merged;
1365 
1366   PetscFunctionBegin;
1367   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1368   /* do nondiagonal part */
1369   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1370   if (!merged) {
1371     /* send it on its way */
1372     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* receive remote parts: note this assumes the values are not actually */
1376     /* inserted in yy until the next line */
1377     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378   } else {
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* send it on its way */
1382     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383     /* values actually were received in the Begin() but we need to call this nop */
1384     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1391 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1392 {
1393   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   /* do nondiagonal part */
1398   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1399   /* send it on its way */
1400   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   /* do local part */
1402   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1403   /* receive remote parts: note this assumes the values are not actually */
1404   /* inserted in yy until the next line, which is true for my implementation*/
1405   /* but is not perhaps always true. */
1406   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*
1411   This only works correctly for square matrices where the subblock A->A is the
1412    diagonal block
1413 */
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1416 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1423   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatScale_MPIBAIJ"
1429 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1436   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1442 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1443 {
1444   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1445   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1446   PetscErrorCode ierr;
1447   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1448   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1449   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1450 
1451   PetscFunctionBegin;
1452   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1453   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1454   mat->getrowactive = PETSC_TRUE;
1455 
1456   if (!mat->rowvalues && (idx || v)) {
1457     /*
1458         allocate enough space to hold information from the longest row.
1459     */
1460     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1461     PetscInt     max = 1,mbs = mat->mbs,tmp;
1462     for (i=0; i<mbs; i++) {
1463       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1464       if (max < tmp) { max = tmp; }
1465     }
1466     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1467   }
1468   lrow = row - brstart;
1469 
1470   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1471   if (!v)   {pvA = 0; pvB = 0;}
1472   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1473   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1474   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1475   nztot = nzA + nzB;
1476 
1477   cmap  = mat->garray;
1478   if (v  || idx) {
1479     if (nztot) {
1480       /* Sort by increasing column numbers, assuming A and B already sorted */
1481       PetscInt imark = -1;
1482       if (v) {
1483         *v = v_p = mat->rowvalues;
1484         for (i=0; i<nzB; i++) {
1485           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1486           else break;
1487         }
1488         imark = i;
1489         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1490         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1491       }
1492       if (idx) {
1493         *idx = idx_p = mat->rowindices;
1494         if (imark > -1) {
1495           for (i=0; i<imark; i++) {
1496             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1497           }
1498         } else {
1499           for (i=0; i<nzB; i++) {
1500             if (cmap[cworkB[i]/bs] < cstart)
1501               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502             else break;
1503           }
1504           imark = i;
1505         }
1506         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1507         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1508       }
1509     } else {
1510       if (idx) *idx = 0;
1511       if (v)   *v   = 0;
1512     }
1513   }
1514   *nz = nztot;
1515   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1516   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1522 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1523 {
1524   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1525 
1526   PetscFunctionBegin;
1527   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1528   baij->getrowactive = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1534 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1535 {
1536   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1541   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1547 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1548 {
1549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1550   Mat            A = a->A,B = a->B;
1551   PetscErrorCode ierr;
1552   PetscReal      isend[5],irecv[5];
1553 
1554   PetscFunctionBegin;
1555   info->block_size     = (PetscReal)matin->rmap->bs;
1556   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1557   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1558   isend[3] = info->memory;  isend[4] = info->mallocs;
1559   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1560   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1561   isend[3] += info->memory;  isend[4] += info->mallocs;
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570     info->nz_used      = irecv[0];
1571     info->nz_allocated = irecv[1];
1572     info->nz_unneeded  = irecv[2];
1573     info->memory       = irecv[3];
1574     info->mallocs      = irecv[4];
1575   } else if (flag == MAT_GLOBAL_SUM) {
1576     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else {
1583     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1584   }
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1593 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1594 {
1595   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   switch (op) {
1600   case MAT_NEW_NONZERO_LOCATIONS:
1601   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1602   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1603   case MAT_KEEP_NONZERO_PATTERN:
1604   case MAT_NEW_NONZERO_LOCATION_ERR:
1605     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1606     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1607     break;
1608   case MAT_ROW_ORIENTED:
1609     a->roworiented = flg;
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_SYMMETRIC:
1623   case MAT_STRUCTURALLY_SYMMETRIC:
1624   case MAT_HERMITIAN:
1625   case MAT_SYMMETRY_ETERNAL:
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627     break;
1628   default:
1629     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1636 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1637 {
1638   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1639   Mat_SeqBAIJ    *Aloc;
1640   Mat            B;
1641   PetscErrorCode ierr;
1642   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1643   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1644   MatScalar      *a;
1645 
1646   PetscFunctionBegin;
1647   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1651     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1652     /* Do not know preallocation information, but must set block size */
1653     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1654   } else {
1655     B = *matout;
1656   }
1657 
1658   /* copy over the A part */
1659   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1660   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1661   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1662 
1663   for (i=0; i<mbs; i++) {
1664     rvals[0] = bs*(baij->rstartbs + i);
1665     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1666     for (j=ai[i]; j<ai[i+1]; j++) {
1667       col = (baij->cstartbs+aj[j])*bs;
1668       for (k=0; k<bs; k++) {
1669         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1670         col++; a += bs;
1671       }
1672     }
1673   }
1674   /* copy over the B part */
1675   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1676   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1677   for (i=0; i<mbs; i++) {
1678     rvals[0] = bs*(baij->rstartbs + i);
1679     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1680     for (j=ai[i]; j<ai[i+1]; j++) {
1681       col = baij->garray[aj[j]]*bs;
1682       for (k=0; k<bs; k++) {
1683         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1684         col++; a += bs;
1685       }
1686     }
1687   }
1688   ierr = PetscFree(rvals);CHKERRQ(ierr);
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691 
1692   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1693     *matout = B;
1694   } else {
1695     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1705   Mat            a = baij->A,b = baij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1729   }
1730 
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1736 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1737 {
1738   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1739   PetscErrorCode    ierr;
1740   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1741   PetscInt          i,*owners = A->rmap->range;
1742   PetscInt          *nprocs,j,idx,nsends,row;
1743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1744   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1745   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1746   MPI_Comm          comm = ((PetscObject)A)->comm;
1747   MPI_Request       *send_waits,*recv_waits;
1748   MPI_Status        recv_status,*send_status;
1749   const PetscScalar *xx;
1750   PetscScalar       *bb;
1751 #if defined(PETSC_DEBUG)
1752   PetscBool         found = PETSC_FALSE;
1753 #endif
1754 
1755   PetscFunctionBegin;
1756   /*  first count number of contributors to each processor */
1757   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1758   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1759   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1760   j = 0;
1761   for (i=0; i<N; i++) {
1762     if (lastidx > (idx = rows[i])) j = 0;
1763     lastidx = idx;
1764     for (; j<size; j++) {
1765       if (idx >= owners[j] && idx < owners[j+1]) {
1766         nprocs[2*j]++;
1767         nprocs[2*j+1] = 1;
1768         owner[i] = j;
1769 #if defined(PETSC_DEBUG)
1770         found = PETSC_TRUE;
1771 #endif
1772         break;
1773       }
1774     }
1775 #if defined(PETSC_DEBUG)
1776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1777     found = PETSC_FALSE;
1778 #endif
1779   }
1780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1781 
1782   if (A->nooffproczerorows) {
1783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1784     nrecvs = nsends;
1785     nmax   = N;
1786   } else {
1787     /* inform other processors of number of messages and max length*/
1788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1789   }
1790 
1791   /* post receives:   */
1792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1794   for (i=0; i<nrecvs; i++) {
1795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1796   }
1797 
1798   /* do sends:
1799      1) starts[i] gives the starting index in svalues for stuff going to
1800      the ith processor
1801   */
1802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1805   starts[0]  = 0;
1806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1807   for (i=0; i<N; i++) {
1808     svalues[starts[owner[i]]++] = rows[i];
1809   }
1810 
1811   starts[0] = 0;
1812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1813   count = 0;
1814   for (i=0; i<size; i++) {
1815     if (nprocs[2*i+1]) {
1816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1817     }
1818   }
1819   ierr = PetscFree(starts);CHKERRQ(ierr);
1820 
1821   base = owners[rank];
1822 
1823   /*  wait on receives */
1824   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1825   count  = nrecvs;
1826   slen = 0;
1827   while (count) {
1828     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1829     /* unpack receives into our local space */
1830     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1831     source[imdex]  = recv_status.MPI_SOURCE;
1832     lens[imdex]    = n;
1833     slen          += n;
1834     count--;
1835   }
1836   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1837 
1838   /* move the data into the send scatter */
1839   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1840   count = 0;
1841   for (i=0; i<nrecvs; i++) {
1842     values = rvalues + i*nmax;
1843     for (j=0; j<lens[i]; j++) {
1844       lrows[count++] = values[j] - base;
1845     }
1846   }
1847   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1848   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1849   ierr = PetscFree(owner);CHKERRQ(ierr);
1850   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1851 
1852   /* fix right hand side if needed */
1853   if (x && b) {
1854     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1856     for (i=0; i<slen; i++) {
1857       bb[lrows[i]] = diag*xx[lrows[i]];
1858     }
1859     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1860     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1861   }
1862 
1863   /* actually zap the local rows */
1864   /*
1865         Zero the required rows. If the "diagonal block" of the matrix
1866      is square and the user wishes to set the diagonal we use separate
1867      code so that MatSetValues() is not called for each diagonal allocating
1868      new memory, thus calling lots of mallocs and slowing things down.
1869 
1870   */
1871   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1872   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1875   } else if (diag != 0.0) {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1877     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1878        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1879     for (i=0; i<slen; i++) {
1880       row  = lrows[i] + rstart_bs;
1881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1882     }
1883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885   } else {
1886     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1887   }
1888 
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* wait on sends */
1892   if (nsends) {
1893     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1894     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1895     ierr = PetscFree(send_status);CHKERRQ(ierr);
1896   }
1897   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1898   ierr = PetscFree(svalues);CHKERRQ(ierr);
1899 
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1905 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatEqual_MPIBAIJ"
1919 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1920 {
1921   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1922   Mat            a,b,c,d;
1923   PetscBool      flg;
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   a = matA->A; b = matA->B;
1928   c = matB->A; d = matB->B;
1929 
1930   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1931   if (flg) {
1932     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1933   }
1934   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatCopy_MPIBAIJ"
1940 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1941 {
1942   PetscErrorCode ierr;
1943   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1944   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1945 
1946   PetscFunctionBegin;
1947   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1948   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1949     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1950   } else {
1951     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1952     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1959 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1970 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1971 {
1972   PetscErrorCode ierr;
1973   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1974   PetscBLASInt   bnz,one=1;
1975   Mat_SeqBAIJ    *x,*y;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x = (Mat_SeqBAIJ *)xx->A->data;
1981     y = (Mat_SeqBAIJ *)yy->A->data;
1982     bnz = PetscBLASIntCast(x->nz);
1983     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1984     x = (Mat_SeqBAIJ *)xx->B->data;
1985     y = (Mat_SeqBAIJ *)yy->B->data;
1986     bnz = PetscBLASIntCast(x->nz);
1987     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1988   } else {
1989     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1996 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1997 {
1998   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2003   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2009 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2010 {
2011   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2016   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2022 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2023 {
2024   PetscErrorCode ierr;
2025   IS             iscol_local;
2026   PetscInt       csize;
2027 
2028   PetscFunctionBegin;
2029   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2030   if (call == MAT_REUSE_MATRIX) {
2031     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2032     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2033   } else {
2034     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2035   }
2036   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2037   if (call == MAT_INITIAL_MATRIX) {
2038     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2039     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2046 /*
2047   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2048   in local and then by concatenating the local matrices the end result.
2049   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2050 */
2051 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2052 {
2053   PetscErrorCode ierr;
2054   PetscMPIInt    rank,size;
2055   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2056   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2057   Mat            M,Mreuse;
2058   MatScalar      *vwork,*aa;
2059   MPI_Comm       comm = ((PetscObject)mat)->comm;
2060   IS             isrow_new, iscol_new;
2061   PetscBool      idflag,allrows, allcols;
2062   Mat_SeqBAIJ    *aij;
2063 
2064 
2065   PetscFunctionBegin;
2066   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2067   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2068   /* The compression and expansion should be avoided. Doesn't point
2069      out errors, might change the indices, hence buggey */
2070   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2071   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2072 
2073   /* Check for special case: each processor gets entire matrix columns */
2074   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2075   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2076   if (idflag && ncol == mat->cmap->N){
2077     allcols = PETSC_TRUE;
2078   } else {
2079     allcols = PETSC_FALSE;
2080   }
2081 
2082   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2083   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2084   if (idflag && nrow == mat->rmap->N){
2085     allrows = PETSC_TRUE;
2086   } else {
2087     allrows = PETSC_FALSE;
2088   }
2089   if (call ==  MAT_REUSE_MATRIX) {
2090     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2091     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2092     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2093   } else {
2094     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2095   }
2096   ierr = ISDestroy(&isrow_new); CHKERRQ(ierr);
2097   ierr = ISDestroy(&iscol_new); CHKERRQ(ierr);
2098   /*
2099       m - number of local rows
2100       n - number of columns (same on all processors)
2101       rstart - first row in new global matrix generated
2102   */
2103   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2104   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2105   m    = m/bs;
2106   n    = n/bs;
2107 
2108   if (call == MAT_INITIAL_MATRIX) {
2109     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2110     ii  = aij->i;
2111     jj  = aij->j;
2112 
2113     /*
2114         Determine the number of non-zeros in the diagonal and off-diagonal
2115         portions of the matrix in order to do correct preallocation
2116     */
2117 
2118     /* first get start and end of "diagonal" columns */
2119     if (csize == PETSC_DECIDE) {
2120       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2121       if (mglobal == n*bs) { /* square matrix */
2122 	nlocal = m;
2123       } else {
2124         nlocal = n/size + ((n % size) > rank);
2125       }
2126     } else {
2127       nlocal = csize/bs;
2128     }
2129     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2130     rstart = rend - nlocal;
2131     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2132 
2133     /* next, compute all the lengths */
2134     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2135     olens = dlens + m;
2136     for (i=0; i<m; i++) {
2137       jend = ii[i+1] - ii[i];
2138       olen = 0;
2139       dlen = 0;
2140       for (j=0; j<jend; j++) {
2141         if (*jj < rstart || *jj >= rend) olen++;
2142         else dlen++;
2143         jj++;
2144       }
2145       olens[i] = olen;
2146       dlens[i] = dlen;
2147     }
2148     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2149     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2150     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2151     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2152     ierr = PetscFree(dlens);CHKERRQ(ierr);
2153   } else {
2154     PetscInt ml,nl;
2155 
2156     M = *newmat;
2157     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2158     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2159     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2160     /*
2161          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2162        rather than the slower MatSetValues().
2163     */
2164     M->was_assembled = PETSC_TRUE;
2165     M->assembled     = PETSC_FALSE;
2166   }
2167   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2168   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2169   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2170   ii  = aij->i;
2171   jj  = aij->j;
2172   aa  = aij->a;
2173   for (i=0; i<m; i++) {
2174     row   = rstart/bs + i;
2175     nz    = ii[i+1] - ii[i];
2176     cwork = jj;     jj += nz;
2177     vwork = aa;     aa += nz;
2178     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2179   }
2180 
2181   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   *newmat = M;
2184 
2185   /* save submatrix used in processor for next request */
2186   if (call ==  MAT_INITIAL_MATRIX) {
2187     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2188     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2189   }
2190 
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatPermute_MPIBAIJ"
2196 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2197 {
2198   MPI_Comm       comm,pcomm;
2199   PetscInt       first,local_size,nrows;
2200   const PetscInt *rows;
2201   PetscMPIInt    size;
2202   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2207   /* make a collective version of 'rowp' */
2208   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2209   if (pcomm==comm) {
2210     crowp = rowp;
2211   } else {
2212     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2213     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2214     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2215     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2216   }
2217   /* collect the global row permutation and invert it */
2218   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2219   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2220   if (pcomm!=comm) {
2221     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2222   }
2223   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2224   /* get the local target indices */
2225   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2226   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2227   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2228   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2229   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2230   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2231   /* the column permutation is so much easier;
2232      make a local version of 'colp' and invert it */
2233   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2234   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2235   if (size==1) {
2236     lcolp = colp;
2237   } else {
2238     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2239     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2240     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2241   }
2242   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2243   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2244   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2245   if (size>1) {
2246     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2247     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2248   }
2249   /* now we just get the submatrix */
2250   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2251   /* clean up */
2252   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2253   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2259 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2260 {
2261   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2262   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2263 
2264   PetscFunctionBegin;
2265   if (nghosts) { *nghosts = B->nbs;}
2266   if (ghosts) {*ghosts = baij->garray;}
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2274 /*
2275     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2276 */
2277 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2278 {
2279   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2280   PetscErrorCode        ierr;
2281   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2282   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2283   const PetscInt        *is;
2284   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2285   PetscInt              *rowhit,M,cstart,cend,colb;
2286   PetscInt              *columnsforrow,l;
2287   IS                    *isa;
2288   PetscBool              done,flg;
2289   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2290   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2291 
2292   PetscFunctionBegin;
2293   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2294   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2295 
2296   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2297   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2298   M                = mat->rmap->n/bs;
2299   cstart           = mat->cmap->rstart/bs;
2300   cend             = mat->cmap->rend/bs;
2301   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2302   c->N             = mat->cmap->N/bs;
2303   c->m             = mat->rmap->n/bs;
2304   c->rstart        = mat->rmap->rstart/bs;
2305 
2306   c->ncolors       = nis;
2307   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2308   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2309   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2310   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2311   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2312   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2313 
2314   /* Allow access to data structures of local part of matrix */
2315   if (!baij->colmap) {
2316     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2317   }
2318   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2319   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2320 
2321   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2322   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2323 
2324   for (i=0; i<nis; i++) {
2325     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2326     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2327     c->ncolumns[i] = n;
2328     if (n) {
2329       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2330       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2331       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2332     } else {
2333       c->columns[i]  = 0;
2334     }
2335 
2336     if (ctype == IS_COLORING_GLOBAL){
2337       /* Determine the total (parallel) number of columns of this color */
2338       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2339       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2340 
2341       nn   = PetscMPIIntCast(n);
2342       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2343       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2344       if (!nctot) {
2345         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2346       }
2347 
2348       disp[0] = 0;
2349       for (j=1; j<size; j++) {
2350         disp[j] = disp[j-1] + ncolsonproc[j-1];
2351       }
2352 
2353       /* Get complete list of columns for color on each processor */
2354       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2355       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2356       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2357     } else if (ctype == IS_COLORING_GHOSTED){
2358       /* Determine local number of columns of this color on this process, including ghost points */
2359       nctot = n;
2360       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2361       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2362     } else {
2363       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2364     }
2365 
2366     /*
2367        Mark all rows affect by these columns
2368     */
2369     /* Temporary option to allow for debugging/testing */
2370     flg  = PETSC_FALSE;
2371     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2372     if (!flg) {/*-----------------------------------------------------------------------------*/
2373       /* crude, fast version */
2374       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2375       /* loop over columns*/
2376       for (j=0; j<nctot; j++) {
2377         if (ctype == IS_COLORING_GHOSTED) {
2378           col = ltog[cols[j]];
2379         } else {
2380           col  = cols[j];
2381         }
2382         if (col >= cstart && col < cend) {
2383           /* column is in diagonal block of matrix */
2384           rows = A_cj + A_ci[col-cstart];
2385           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2386         } else {
2387 #if defined (PETSC_USE_CTABLE)
2388           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2389 	  colb --;
2390 #else
2391           colb = baij->colmap[col] - 1;
2392 #endif
2393           if (colb == -1) {
2394             m = 0;
2395           } else {
2396             colb = colb/bs;
2397             rows = B_cj + B_ci[colb];
2398             m    = B_ci[colb+1] - B_ci[colb];
2399           }
2400         }
2401         /* loop over columns marking them in rowhit */
2402         for (k=0; k<m; k++) {
2403           rowhit[*rows++] = col + 1;
2404         }
2405       }
2406 
2407       /* count the number of hits */
2408       nrows = 0;
2409       for (j=0; j<M; j++) {
2410         if (rowhit[j]) nrows++;
2411       }
2412       c->nrows[i]         = nrows;
2413       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2414       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2415       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2416       nrows = 0;
2417       for (j=0; j<M; j++) {
2418         if (rowhit[j]) {
2419           c->rows[i][nrows]           = j;
2420           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2421           nrows++;
2422         }
2423       }
2424     } else {/*-------------------------------------------------------------------------------*/
2425       /* slow version, using rowhit as a linked list */
2426       PetscInt currentcol,fm,mfm;
2427       rowhit[M] = M;
2428       nrows     = 0;
2429       /* loop over columns*/
2430       for (j=0; j<nctot; j++) {
2431         if (ctype == IS_COLORING_GHOSTED) {
2432           col = ltog[cols[j]];
2433         } else {
2434           col  = cols[j];
2435         }
2436         if (col >= cstart && col < cend) {
2437           /* column is in diagonal block of matrix */
2438           rows = A_cj + A_ci[col-cstart];
2439           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2440         } else {
2441 #if defined (PETSC_USE_CTABLE)
2442           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2443           colb --;
2444 #else
2445           colb = baij->colmap[col] - 1;
2446 #endif
2447           if (colb == -1) {
2448             m = 0;
2449           } else {
2450             colb = colb/bs;
2451             rows = B_cj + B_ci[colb];
2452             m    = B_ci[colb+1] - B_ci[colb];
2453           }
2454         }
2455 
2456         /* loop over columns marking them in rowhit */
2457         fm    = M; /* fm points to first entry in linked list */
2458         for (k=0; k<m; k++) {
2459           currentcol = *rows++;
2460 	  /* is it already in the list? */
2461           do {
2462             mfm  = fm;
2463             fm   = rowhit[fm];
2464           } while (fm < currentcol);
2465           /* not in list so add it */
2466           if (fm != currentcol) {
2467             nrows++;
2468             columnsforrow[currentcol] = col;
2469             /* next three lines insert new entry into linked list */
2470             rowhit[mfm]               = currentcol;
2471             rowhit[currentcol]        = fm;
2472             fm                        = currentcol;
2473             /* fm points to present position in list since we know the columns are sorted */
2474           } else {
2475             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2476           }
2477         }
2478       }
2479       c->nrows[i]         = nrows;
2480       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2481       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2482       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2483       /* now store the linked list of rows into c->rows[i] */
2484       nrows = 0;
2485       fm    = rowhit[M];
2486       do {
2487         c->rows[i][nrows]            = fm;
2488         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2489         fm                           = rowhit[fm];
2490       } while (fm < M);
2491     } /* ---------------------------------------------------------------------------------------*/
2492     ierr = PetscFree(cols);CHKERRQ(ierr);
2493   }
2494 
2495   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2496   /*
2497        vscale will contain the "diagonal" on processor scalings followed by the off processor
2498   */
2499   if (ctype == IS_COLORING_GLOBAL) {
2500     PetscInt *garray;
2501     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2502     for (i=0; i<baij->B->cmap->n/bs; i++) {
2503       for (j=0; j<bs; j++) {
2504         garray[i*bs+j] = bs*baij->garray[i]+j;
2505       }
2506     }
2507     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2508     ierr = PetscFree(garray);CHKERRQ(ierr);
2509     CHKMEMQ;
2510     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2511     for (k=0; k<c->ncolors; k++) {
2512       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2513       for (l=0; l<c->nrows[k]; l++) {
2514         col = c->columnsforrow[k][l];
2515         if (col >= cstart && col < cend) {
2516           /* column is in diagonal block of matrix */
2517           colb = col - cstart;
2518         } else {
2519           /* column  is in "off-processor" part */
2520 #if defined (PETSC_USE_CTABLE)
2521           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2522           colb --;
2523 #else
2524           colb = baij->colmap[col] - 1;
2525 #endif
2526           colb = colb/bs;
2527           colb += cend - cstart;
2528         }
2529         c->vscaleforrow[k][l] = colb;
2530       }
2531     }
2532   } else if (ctype == IS_COLORING_GHOSTED) {
2533     /* Get gtol mapping */
2534     PetscInt N = mat->cmap->N, *gtol;
2535     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2536     for (i=0; i<N; i++) gtol[i] = -1;
2537     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2538 
2539     c->vscale = 0; /* will be created in MatFDColoringApply() */
2540     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2541     for (k=0; k<c->ncolors; k++) {
2542       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2543       for (l=0; l<c->nrows[k]; l++) {
2544         col = c->columnsforrow[k][l];      /* global column index */
2545         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2546       }
2547     }
2548     ierr = PetscFree(gtol);CHKERRQ(ierr);
2549   }
2550   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2551 
2552   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2553   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2554   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2555   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2556     CHKMEMQ;
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2562 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2563 {
2564   Mat            B;
2565   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2566   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2567   Mat_SeqAIJ     *b;
2568   PetscErrorCode ierr;
2569   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2570   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2571   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2572 
2573   PetscFunctionBegin;
2574   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2575   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2576 
2577   /* ----------------------------------------------------------------
2578      Tell every processor the number of nonzeros per row
2579   */
2580   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2581   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2582     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2583   }
2584   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2585   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2586   displs     = recvcounts + size;
2587   for (i=0; i<size; i++) {
2588     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2589     displs[i]     = A->rmap->range[i]/bs;
2590   }
2591 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2592   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2593 #else
2594   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2595 #endif
2596   /* ---------------------------------------------------------------
2597      Create the sequential matrix of the same type as the local block diagonal
2598   */
2599   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2600   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2601   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2602   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2603   b = (Mat_SeqAIJ *)B->data;
2604 
2605   /*--------------------------------------------------------------------
2606     Copy my part of matrix column indices over
2607   */
2608   sendcount  = ad->nz + bd->nz;
2609   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2610   a_jsendbuf = ad->j;
2611   b_jsendbuf = bd->j;
2612   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2613   cnt        = 0;
2614   for (i=0; i<n; i++) {
2615 
2616     /* put in lower diagonal portion */
2617     m = bd->i[i+1] - bd->i[i];
2618     while (m > 0) {
2619       /* is it above diagonal (in bd (compressed) numbering) */
2620       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2621       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2622       m--;
2623     }
2624 
2625     /* put in diagonal portion */
2626     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2627       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2628     }
2629 
2630     /* put in upper diagonal portion */
2631     while (m-- > 0) {
2632       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2633     }
2634   }
2635   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2636 
2637   /*--------------------------------------------------------------------
2638     Gather all column indices to all processors
2639   */
2640   for (i=0; i<size; i++) {
2641     recvcounts[i] = 0;
2642     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2643       recvcounts[i] += lens[j];
2644     }
2645   }
2646   displs[0]  = 0;
2647   for (i=1; i<size; i++) {
2648     displs[i] = displs[i-1] + recvcounts[i-1];
2649   }
2650 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2651   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2652 #else
2653   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2654 #endif
2655   /*--------------------------------------------------------------------
2656     Assemble the matrix into useable form (note numerical values not yet set)
2657   */
2658   /* set the b->ilen (length of each row) values */
2659   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2660   /* set the b->i indices */
2661   b->i[0] = 0;
2662   for (i=1; i<=A->rmap->N/bs; i++) {
2663     b->i[i] = b->i[i-1] + lens[i-1];
2664   }
2665   ierr = PetscFree(lens);CHKERRQ(ierr);
2666   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2667   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2668   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2669 
2670   if (A->symmetric){
2671     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2672   } else if (A->hermitian) {
2673     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2674   } else if (A->structurally_symmetric) {
2675     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2676   }
2677   *newmat = B;
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatSOR_MPIBAIJ"
2683 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2684 {
2685   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2686   PetscErrorCode ierr;
2687   Vec            bb1 = 0;
2688 
2689   PetscFunctionBegin;
2690   if (flag == SOR_APPLY_UPPER) {
2691     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2692     PetscFunctionReturn(0);
2693   }
2694 
2695   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2696     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2697   }
2698 
2699   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2700     if (flag & SOR_ZERO_INITIAL_GUESS) {
2701       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2702       its--;
2703     }
2704 
2705     while (its--) {
2706       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2708 
2709       /* update rhs: bb1 = bb - B*x */
2710       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2711       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2712 
2713       /* local sweep */
2714       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2715     }
2716   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2717     if (flag & SOR_ZERO_INITIAL_GUESS) {
2718       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2719       its--;
2720     }
2721     while (its--) {
2722       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2724 
2725       /* update rhs: bb1 = bb - B*x */
2726       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2727       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2728 
2729       /* local sweep */
2730       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2731     }
2732   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2733     if (flag & SOR_ZERO_INITIAL_GUESS) {
2734       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2735       its--;
2736     }
2737     while (its--) {
2738       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2739       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2740 
2741       /* update rhs: bb1 = bb - B*x */
2742       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2743       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2744 
2745       /* local sweep */
2746       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2747     }
2748   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2749 
2750   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2758 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2759 {
2760   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 
2769 /* -------------------------------------------------------------------*/
2770 static struct _MatOps MatOps_Values = {
2771        MatSetValues_MPIBAIJ,
2772        MatGetRow_MPIBAIJ,
2773        MatRestoreRow_MPIBAIJ,
2774        MatMult_MPIBAIJ,
2775 /* 4*/ MatMultAdd_MPIBAIJ,
2776        MatMultTranspose_MPIBAIJ,
2777        MatMultTransposeAdd_MPIBAIJ,
2778        0,
2779        0,
2780        0,
2781 /*10*/ 0,
2782        0,
2783        0,
2784        MatSOR_MPIBAIJ,
2785        MatTranspose_MPIBAIJ,
2786 /*15*/ MatGetInfo_MPIBAIJ,
2787        MatEqual_MPIBAIJ,
2788        MatGetDiagonal_MPIBAIJ,
2789        MatDiagonalScale_MPIBAIJ,
2790        MatNorm_MPIBAIJ,
2791 /*20*/ MatAssemblyBegin_MPIBAIJ,
2792        MatAssemblyEnd_MPIBAIJ,
2793        MatSetOption_MPIBAIJ,
2794        MatZeroEntries_MPIBAIJ,
2795 /*24*/ MatZeroRows_MPIBAIJ,
2796        0,
2797        0,
2798        0,
2799        0,
2800 /*29*/ MatSetUp_MPIBAIJ,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*34*/ MatDuplicate_MPIBAIJ,
2806        0,
2807        0,
2808        0,
2809        0,
2810 /*39*/ MatAXPY_MPIBAIJ,
2811        MatGetSubMatrices_MPIBAIJ,
2812        MatIncreaseOverlap_MPIBAIJ,
2813        MatGetValues_MPIBAIJ,
2814        MatCopy_MPIBAIJ,
2815 /*44*/ 0,
2816        MatScale_MPIBAIJ,
2817        0,
2818        0,
2819        0,
2820 /*49*/ 0,
2821        0,
2822        0,
2823        0,
2824        0,
2825 /*54*/ MatFDColoringCreate_MPIBAIJ,
2826        0,
2827        MatSetUnfactored_MPIBAIJ,
2828        MatPermute_MPIBAIJ,
2829        MatSetValuesBlocked_MPIBAIJ,
2830 /*59*/ MatGetSubMatrix_MPIBAIJ,
2831        MatDestroy_MPIBAIJ,
2832        MatView_MPIBAIJ,
2833        0,
2834        0,
2835 /*64*/ 0,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2841        0,
2842        0,
2843        0,
2844        0,
2845 /*74*/ 0,
2846        MatFDColoringApply_BAIJ,
2847        0,
2848        0,
2849        0,
2850 /*79*/ 0,
2851        0,
2852        0,
2853        0,
2854        MatLoad_MPIBAIJ,
2855 /*84*/ 0,
2856        0,
2857        0,
2858        0,
2859        0,
2860 /*89*/ 0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*94*/ 0,
2866        0,
2867        0,
2868        0,
2869        0,
2870 /*99*/ 0,
2871        0,
2872        0,
2873        0,
2874        0,
2875 /*104*/0,
2876        MatRealPart_MPIBAIJ,
2877        MatImaginaryPart_MPIBAIJ,
2878        0,
2879        0,
2880 /*109*/0,
2881        0,
2882        0,
2883        0,
2884        0,
2885 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2886        0,
2887        MatGetGhosts_MPIBAIJ,
2888        0,
2889        0,
2890 /*119*/0,
2891        0,
2892        0,
2893        0,
2894        0,
2895 /*124*/0,
2896        0,
2897        MatInvertBlockDiagonal_MPIBAIJ
2898 };
2899 
2900 EXTERN_C_BEGIN
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2903 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2904 {
2905   PetscFunctionBegin;
2906   *a = ((Mat_MPIBAIJ *)A->data)->A;
2907   PetscFunctionReturn(0);
2908 }
2909 EXTERN_C_END
2910 
2911 EXTERN_C_BEGIN
2912 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2913 EXTERN_C_END
2914 
2915 EXTERN_C_BEGIN
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2918 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2919 {
2920   PetscInt       m,rstart,cstart,cend;
2921   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2922   const PetscInt *JJ=0;
2923   PetscScalar    *values=0;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2928   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2930   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2931   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2932   m      = B->rmap->n/bs;
2933   rstart = B->rmap->rstart/bs;
2934   cstart = B->cmap->rstart/bs;
2935   cend   = B->cmap->rend/bs;
2936 
2937   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2938   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2939   for (i=0; i<m; i++) {
2940     nz = ii[i+1] - ii[i];
2941     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2942     nz_max = PetscMax(nz_max,nz);
2943     JJ  = jj + ii[i];
2944     for (j=0; j<nz; j++) {
2945       if (*JJ >= cstart) break;
2946       JJ++;
2947     }
2948     d = 0;
2949     for (; j<nz; j++) {
2950       if (*JJ++ >= cend) break;
2951       d++;
2952     }
2953     d_nnz[i] = d;
2954     o_nnz[i] = nz - d;
2955   }
2956   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2957   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2958 
2959   values = (PetscScalar*)V;
2960   if (!values) {
2961     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2962     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2963   }
2964   for (i=0; i<m; i++) {
2965     PetscInt          row    = i + rstart;
2966     PetscInt          ncols  = ii[i+1] - ii[i];
2967     const PetscInt    *icols = jj + ii[i];
2968     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2969     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2970   }
2971 
2972   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2976   PetscFunctionReturn(0);
2977 }
2978 EXTERN_C_END
2979 
2980 #undef __FUNCT__
2981 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2982 /*@C
2983    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2984    (the default parallel PETSc format).
2985 
2986    Collective on MPI_Comm
2987 
2988    Input Parameters:
2989 +  A - the matrix
2990 .  bs - the block size
2991 .  i - the indices into j for the start of each local row (starts with zero)
2992 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2993 -  v - optional values in the matrix
2994 
2995    Level: developer
2996 
2997 .keywords: matrix, aij, compressed row, sparse, parallel
2998 
2999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3000 @*/
3001 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3002 {
3003   PetscErrorCode ierr;
3004 
3005   PetscFunctionBegin;
3006   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3007   PetscValidType(B,1);
3008   PetscValidLogicalCollectiveInt(B,bs,2);
3009   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 EXTERN_C_BEGIN
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3016 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3017 {
3018   Mat_MPIBAIJ    *b;
3019   PetscErrorCode ierr;
3020   PetscInt       i;
3021   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3022 
3023   PetscFunctionBegin;
3024   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3025   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3026 
3027   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3028   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3029   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3030   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3031 
3032   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3033   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3034   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3035   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3036   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3037 
3038   if (d_nnz) {
3039     for (i=0; i<B->rmap->n/bs; i++) {
3040       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3041     }
3042   }
3043   if (o_nnz) {
3044     for (i=0; i<B->rmap->n/bs; i++) {
3045       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3046     }
3047   }
3048 
3049   b = (Mat_MPIBAIJ*)B->data;
3050   b->bs2 = bs*bs;
3051   b->mbs = B->rmap->n/bs;
3052   b->nbs = B->cmap->n/bs;
3053   b->Mbs = B->rmap->N/bs;
3054   b->Nbs = B->cmap->N/bs;
3055 
3056   for (i=0; i<=b->size; i++) {
3057     b->rangebs[i] = B->rmap->range[i]/bs;
3058   }
3059   b->rstartbs = B->rmap->rstart/bs;
3060   b->rendbs   = B->rmap->rend/bs;
3061   b->cstartbs = B->cmap->rstart/bs;
3062   b->cendbs   = B->cmap->rend/bs;
3063 
3064   if (!B->preallocated) {
3065     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3066     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3067     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3068     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3069     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3070     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3071     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3072     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3073     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3074   }
3075 
3076   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3077   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3078   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3079   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3080   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3081   B->preallocated = PETSC_TRUE;
3082   PetscFunctionReturn(0);
3083 }
3084 EXTERN_C_END
3085 
3086 EXTERN_C_BEGIN
3087 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3088 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3089 EXTERN_C_END
3090 
3091 
3092 EXTERN_C_BEGIN
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3095 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3096 {
3097   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3098   PetscErrorCode ierr;
3099   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3100   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3101   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3102 
3103   PetscFunctionBegin;
3104   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3105   ii[0] = 0;
3106   CHKMEMQ;
3107   for (i=0; i<M; i++) {
3108     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3109     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3110     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3111     /* remove one from count of matrix has diagonal */
3112     for (j=id[i]; j<id[i+1]; j++) {
3113       if (jd[j] == i) {ii[i+1]--;break;}
3114     }
3115   CHKMEMQ;
3116   }
3117   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3118   cnt = 0;
3119   for (i=0; i<M; i++) {
3120     for (j=io[i]; j<io[i+1]; j++) {
3121       if (garray[jo[j]] > rstart) break;
3122       jj[cnt++] = garray[jo[j]];
3123   CHKMEMQ;
3124     }
3125     for (k=id[i]; k<id[i+1]; k++) {
3126       if (jd[k] != i) {
3127         jj[cnt++] = rstart + jd[k];
3128   CHKMEMQ;
3129       }
3130     }
3131     for (;j<io[i+1]; j++) {
3132       jj[cnt++] = garray[jo[j]];
3133   CHKMEMQ;
3134     }
3135   }
3136   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3137   PetscFunctionReturn(0);
3138 }
3139 EXTERN_C_END
3140 
3141 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3142 EXTERN_C_BEGIN
3143 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3144 EXTERN_C_END
3145 
3146 EXTERN_C_BEGIN
3147 #undef __FUNCT__
3148 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3149 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3150 {
3151   PetscErrorCode ierr;
3152   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3153   Mat            B;
3154   Mat_MPIAIJ     *b;
3155 
3156   PetscFunctionBegin;
3157   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3158 
3159   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3160   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3161   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3162   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3163   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3164   b = (Mat_MPIAIJ*) B->data;
3165 
3166   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3167   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3168   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3169   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3170   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3171   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3172   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3174   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175   if (reuse == MAT_REUSE_MATRIX) {
3176     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3177   } else {
3178    *newmat = B;
3179   }
3180   PetscFunctionReturn(0);
3181 }
3182 EXTERN_C_END
3183 
3184 EXTERN_C_BEGIN
3185 #if defined(PETSC_HAVE_MUMPS)
3186 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3187 #endif
3188 EXTERN_C_END
3189 
3190 /*MC
3191    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3192 
3193    Options Database Keys:
3194 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3195 . -mat_block_size <bs> - set the blocksize used to store the matrix
3196 - -mat_use_hash_table <fact>
3197 
3198   Level: beginner
3199 
3200 .seealso: MatCreateMPIBAIJ
3201 M*/
3202 
3203 EXTERN_C_BEGIN
3204 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3205 EXTERN_C_END
3206 
3207 EXTERN_C_BEGIN
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatCreate_MPIBAIJ"
3210 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3211 {
3212   Mat_MPIBAIJ    *b;
3213   PetscErrorCode ierr;
3214   PetscBool      flg;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3218   B->data = (void*)b;
3219 
3220   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3221   B->assembled  = PETSC_FALSE;
3222 
3223   B->insertmode = NOT_SET_VALUES;
3224   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3225   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3226 
3227   /* build local table of row and column ownerships */
3228   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3229 
3230   /* build cache for off array entries formed */
3231   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3232   b->donotstash  = PETSC_FALSE;
3233   b->colmap      = PETSC_NULL;
3234   b->garray      = PETSC_NULL;
3235   b->roworiented = PETSC_TRUE;
3236 
3237   /* stuff used in block assembly */
3238   b->barray       = 0;
3239 
3240   /* stuff used for matrix vector multiply */
3241   b->lvec         = 0;
3242   b->Mvctx        = 0;
3243 
3244   /* stuff for MatGetRow() */
3245   b->rowindices   = 0;
3246   b->rowvalues    = 0;
3247   b->getrowactive = PETSC_FALSE;
3248 
3249   /* hash table stuff */
3250   b->ht           = 0;
3251   b->hd           = 0;
3252   b->ht_size      = 0;
3253   b->ht_flag      = PETSC_FALSE;
3254   b->ht_fact      = 0;
3255   b->ht_total_ct  = 0;
3256   b->ht_insert_ct = 0;
3257 
3258   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3259   b->ijonly       = PETSC_FALSE;
3260 
3261   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3262     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3263     if (flg) {
3264       PetscReal fact = 1.39;
3265       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3266       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3267       if (fact <= 1.0) fact = 1.39;
3268       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3269       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3270     }
3271   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3272 
3273 #if defined(PETSC_HAVE_MUMPS)
3274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3275 #endif
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3277                                      "MatConvert_MPIBAIJ_MPIAdj",
3278                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3280                                      "MatConvert_MPIBAIJ_MPIAIJ",
3281                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3283                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3284                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3286                                      "MatStoreValues_MPIBAIJ",
3287                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3289                                      "MatRetrieveValues_MPIBAIJ",
3290                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3292                                      "MatGetDiagonalBlock_MPIBAIJ",
3293                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3295                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3296                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3297   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3298 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3299 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3301                                      "MatDiagonalScaleLocal_MPIBAIJ",
3302                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3304                                      "MatSetHashTableFactor_MPIBAIJ",
3305                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3307                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3308                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3309   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3310   PetscFunctionReturn(0);
3311 }
3312 EXTERN_C_END
3313 
3314 /*MC
3315    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3316 
3317    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3318    and MATMPIBAIJ otherwise.
3319 
3320    Options Database Keys:
3321 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3322 
3323   Level: beginner
3324 
3325 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3326 M*/
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3330 /*@C
3331    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3332    (block compressed row).  For good matrix assembly performance
3333    the user should preallocate the matrix storage by setting the parameters
3334    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3335    performance can be increased by more than a factor of 50.
3336 
3337    Collective on Mat
3338 
3339    Input Parameters:
3340 +  A - the matrix
3341 .  bs   - size of blockk
3342 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3343            submatrix  (same for all local rows)
3344 .  d_nnz - array containing the number of block nonzeros in the various block rows
3345            of the in diagonal portion of the local (possibly different for each block
3346            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3347            set it even if it is zero.
3348 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3349            submatrix (same for all local rows).
3350 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3351            off-diagonal portion of the local submatrix (possibly different for
3352            each block row) or PETSC_NULL.
3353 
3354    If the *_nnz parameter is given then the *_nz parameter is ignored
3355 
3356    Options Database Keys:
3357 +   -mat_block_size - size of the blocks to use
3358 -   -mat_use_hash_table <fact>
3359 
3360    Notes:
3361    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3362    than it must be used on all processors that share the object for that argument.
3363 
3364    Storage Information:
3365    For a square global matrix we define each processor's diagonal portion
3366    to be its local rows and the corresponding columns (a square submatrix);
3367    each processor's off-diagonal portion encompasses the remainder of the
3368    local matrix (a rectangular submatrix).
3369 
3370    The user can specify preallocated storage for the diagonal part of
3371    the local submatrix with either d_nz or d_nnz (not both).  Set
3372    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3373    memory allocation.  Likewise, specify preallocated storage for the
3374    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3375 
3376    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3377    the figure below we depict these three local rows and all columns (0-11).
3378 
3379 .vb
3380            0 1 2 3 4 5 6 7 8 9 10 11
3381           -------------------
3382    row 3  |  o o o d d d o o o o o o
3383    row 4  |  o o o d d d o o o o o o
3384    row 5  |  o o o d d d o o o o o o
3385           -------------------
3386 .ve
3387 
3388    Thus, any entries in the d locations are stored in the d (diagonal)
3389    submatrix, and any entries in the o locations are stored in the
3390    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3391    stored simply in the MATSEQBAIJ format for compressed row storage.
3392 
3393    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3394    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3395    In general, for PDE problems in which most nonzeros are near the diagonal,
3396    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3397    or you will get TERRIBLE performance; see the users' manual chapter on
3398    matrices.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Level: intermediate
3406 
3407 .keywords: matrix, block, aij, compressed row, sparse, parallel
3408 
3409 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR()
3410 @*/
3411 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3412 {
3413   PetscErrorCode ierr;
3414 
3415   PetscFunctionBegin;
3416   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3417   PetscValidType(B,1);
3418   PetscValidLogicalCollectiveInt(B,bs,2);
3419   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 #undef __FUNCT__
3424 #define __FUNCT__ "MatCreateBAIJ"
3425 /*@C
3426    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3427    (block compressed row).  For good matrix assembly performance
3428    the user should preallocate the matrix storage by setting the parameters
3429    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3430    performance can be increased by more than a factor of 50.
3431 
3432    Collective on MPI_Comm
3433 
3434    Input Parameters:
3435 +  comm - MPI communicator
3436 .  bs   - size of blockk
3437 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3438            This value should be the same as the local size used in creating the
3439            y vector for the matrix-vector product y = Ax.
3440 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3441            This value should be the same as the local size used in creating the
3442            x vector for the matrix-vector product y = Ax.
3443 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3444 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3445 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3446            submatrix  (same for all local rows)
3447 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3448            of the in diagonal portion of the local (possibly different for each block
3449            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3450            and set it even if it is zero.
3451 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3452            submatrix (same for all local rows).
3453 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3454            off-diagonal portion of the local submatrix (possibly different for
3455            each block row) or PETSC_NULL.
3456 
3457    Output Parameter:
3458 .  A - the matrix
3459 
3460    Options Database Keys:
3461 +   -mat_block_size - size of the blocks to use
3462 -   -mat_use_hash_table <fact>
3463 
3464    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3465    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3466    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3467 
3468    Notes:
3469    If the *_nnz parameter is given then the *_nz parameter is ignored
3470 
3471    A nonzero block is any block that as 1 or more nonzeros in it
3472 
3473    The user MUST specify either the local or global matrix dimensions
3474    (possibly both).
3475 
3476    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3477    than it must be used on all processors that share the object for that argument.
3478 
3479    Storage Information:
3480    For a square global matrix we define each processor's diagonal portion
3481    to be its local rows and the corresponding columns (a square submatrix);
3482    each processor's off-diagonal portion encompasses the remainder of the
3483    local matrix (a rectangular submatrix).
3484 
3485    The user can specify preallocated storage for the diagonal part of
3486    the local submatrix with either d_nz or d_nnz (not both).  Set
3487    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3488    memory allocation.  Likewise, specify preallocated storage for the
3489    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3490 
3491    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3492    the figure below we depict these three local rows and all columns (0-11).
3493 
3494 .vb
3495            0 1 2 3 4 5 6 7 8 9 10 11
3496           -------------------
3497    row 3  |  o o o d d d o o o o o o
3498    row 4  |  o o o d d d o o o o o o
3499    row 5  |  o o o d d d o o o o o o
3500           -------------------
3501 .ve
3502 
3503    Thus, any entries in the d locations are stored in the d (diagonal)
3504    submatrix, and any entries in the o locations are stored in the
3505    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3506    stored simply in the MATSEQBAIJ format for compressed row storage.
3507 
3508    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3509    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3510    In general, for PDE problems in which most nonzeros are near the diagonal,
3511    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3512    or you will get TERRIBLE performance; see the users' manual chapter on
3513    matrices.
3514 
3515    Level: intermediate
3516 
3517 .keywords: matrix, block, aij, compressed row, sparse, parallel
3518 
3519 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3520 @*/
3521 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3522 {
3523   PetscErrorCode ierr;
3524   PetscMPIInt    size;
3525 
3526   PetscFunctionBegin;
3527   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3528   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530   if (size > 1) {
3531     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3532     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3533   } else {
3534     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3535     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3536   }
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3542 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3543 {
3544   Mat            mat;
3545   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3546   PetscErrorCode ierr;
3547   PetscInt       len=0;
3548 
3549   PetscFunctionBegin;
3550   *newmat       = 0;
3551   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3552   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3553   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3554   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3555 
3556   mat->factortype   = matin->factortype;
3557   mat->preallocated = PETSC_TRUE;
3558   mat->assembled    = PETSC_TRUE;
3559   mat->insertmode   = NOT_SET_VALUES;
3560 
3561   a      = (Mat_MPIBAIJ*)mat->data;
3562   mat->rmap->bs  = matin->rmap->bs;
3563   a->bs2   = oldmat->bs2;
3564   a->mbs   = oldmat->mbs;
3565   a->nbs   = oldmat->nbs;
3566   a->Mbs   = oldmat->Mbs;
3567   a->Nbs   = oldmat->Nbs;
3568 
3569   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3570   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3571 
3572   a->size         = oldmat->size;
3573   a->rank         = oldmat->rank;
3574   a->donotstash   = oldmat->donotstash;
3575   a->roworiented  = oldmat->roworiented;
3576   a->rowindices   = 0;
3577   a->rowvalues    = 0;
3578   a->getrowactive = PETSC_FALSE;
3579   a->barray       = 0;
3580   a->rstartbs     = oldmat->rstartbs;
3581   a->rendbs       = oldmat->rendbs;
3582   a->cstartbs     = oldmat->cstartbs;
3583   a->cendbs       = oldmat->cendbs;
3584 
3585   /* hash table stuff */
3586   a->ht           = 0;
3587   a->hd           = 0;
3588   a->ht_size      = 0;
3589   a->ht_flag      = oldmat->ht_flag;
3590   a->ht_fact      = oldmat->ht_fact;
3591   a->ht_total_ct  = 0;
3592   a->ht_insert_ct = 0;
3593 
3594   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3595   if (oldmat->colmap) {
3596 #if defined (PETSC_USE_CTABLE)
3597   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3598 #else
3599   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3600   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3601   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3602 #endif
3603   } else a->colmap = 0;
3604 
3605   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3606     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3607     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3608     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3609   } else a->garray = 0;
3610 
3611   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3612   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3613   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3614   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3615   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3616 
3617   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3618   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3619   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3620   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3621   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3622   *newmat = mat;
3623 
3624   PetscFunctionReturn(0);
3625 }
3626 
3627 #undef __FUNCT__
3628 #define __FUNCT__ "MatLoad_MPIBAIJ"
3629 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3630 {
3631   PetscErrorCode ierr;
3632   int            fd;
3633   PetscInt       i,nz,j,rstart,rend;
3634   PetscScalar    *vals,*buf;
3635   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3636   MPI_Status     status;
3637   PetscMPIInt    rank,size,maxnz;
3638   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3639   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3640   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3641   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3642   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3643   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3644 
3645   PetscFunctionBegin;
3646   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3647     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3648   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3649 
3650   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3651   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3652   if (!rank) {
3653     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3654     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3655     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3656   }
3657 
3658   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3659 
3660   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3661   M = header[1]; N = header[2];
3662 
3663   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3664   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3665   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3666 
3667   /* If global sizes are set, check if they are consistent with that given in the file */
3668   if (sizesset) {
3669     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3670   }
3671   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3672   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3673 
3674   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3675 
3676   /*
3677      This code adds extra rows to make sure the number of rows is
3678      divisible by the blocksize
3679   */
3680   Mbs        = M/bs;
3681   extra_rows = bs - M + bs*Mbs;
3682   if (extra_rows == bs) extra_rows = 0;
3683   else                  Mbs++;
3684   if (extra_rows && !rank) {
3685     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3686   }
3687 
3688   /* determine ownership of all rows */
3689   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3690     mbs        = Mbs/size + ((Mbs % size) > rank);
3691     m          = mbs*bs;
3692   } else { /* User set */
3693     m          = newmat->rmap->n;
3694     mbs        = m/bs;
3695   }
3696   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3697   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3698 
3699   /* process 0 needs enough room for process with most rows */
3700   if (!rank) {
3701     mmax = rowners[1];
3702     for (i=2; i<=size; i++) {
3703       mmax = PetscMax(mmax,rowners[i]);
3704     }
3705     mmax*=bs;
3706   } else mmax = m;
3707 
3708   rowners[0] = 0;
3709   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3710   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3711   rstart = rowners[rank];
3712   rend   = rowners[rank+1];
3713 
3714   /* distribute row lengths to all processors */
3715   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3716   if (!rank) {
3717     mend = m;
3718     if (size == 1) mend = mend - extra_rows;
3719     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3720     for (j=mend; j<m; j++) locrowlens[j] = 1;
3721     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3722     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3723     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3724     for (j=0; j<m; j++) {
3725       procsnz[0] += locrowlens[j];
3726     }
3727     for (i=1; i<size; i++) {
3728       mend = browners[i+1] - browners[i];
3729       if (i == size-1) mend = mend - extra_rows;
3730       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3731       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3732       /* calculate the number of nonzeros on each processor */
3733       for (j=0; j<browners[i+1]-browners[i]; j++) {
3734         procsnz[i] += rowlengths[j];
3735       }
3736       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3737     }
3738     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3739   } else {
3740     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3741   }
3742 
3743   if (!rank) {
3744     /* determine max buffer needed and allocate it */
3745     maxnz = procsnz[0];
3746     for (i=1; i<size; i++) {
3747       maxnz = PetscMax(maxnz,procsnz[i]);
3748     }
3749     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3750 
3751     /* read in my part of the matrix column indices  */
3752     nz     = procsnz[0];
3753     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3754     mycols = ibuf;
3755     if (size == 1)  nz -= extra_rows;
3756     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3757     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3758 
3759     /* read in every ones (except the last) and ship off */
3760     for (i=1; i<size-1; i++) {
3761       nz   = procsnz[i];
3762       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3763       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3764     }
3765     /* read in the stuff for the last proc */
3766     if (size != 1) {
3767       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3768       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3769       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3770       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3771     }
3772     ierr = PetscFree(cols);CHKERRQ(ierr);
3773   } else {
3774     /* determine buffer space needed for message */
3775     nz = 0;
3776     for (i=0; i<m; i++) {
3777       nz += locrowlens[i];
3778     }
3779     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3780     mycols = ibuf;
3781     /* receive message of column indices*/
3782     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3783     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3784     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3785   }
3786 
3787   /* loop over local rows, determining number of off diagonal entries */
3788   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3789   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3790   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3791   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3792   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3793   rowcount = 0; nzcount = 0;
3794   for (i=0; i<mbs; i++) {
3795     dcount  = 0;
3796     odcount = 0;
3797     for (j=0; j<bs; j++) {
3798       kmax = locrowlens[rowcount];
3799       for (k=0; k<kmax; k++) {
3800         tmp = mycols[nzcount++]/bs;
3801         if (!mask[tmp]) {
3802           mask[tmp] = 1;
3803           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3804           else masked1[dcount++] = tmp;
3805         }
3806       }
3807       rowcount++;
3808     }
3809 
3810     dlens[i]  = dcount;
3811     odlens[i] = odcount;
3812 
3813     /* zero out the mask elements we set */
3814     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3815     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3816   }
3817 
3818 
3819   if (!sizesset) {
3820     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3821   }
3822   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3823 
3824   if (!rank) {
3825     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3826     /* read in my part of the matrix numerical values  */
3827     nz = procsnz[0];
3828     vals = buf;
3829     mycols = ibuf;
3830     if (size == 1)  nz -= extra_rows;
3831     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3832     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3833 
3834     /* insert into matrix */
3835     jj      = rstart*bs;
3836     for (i=0; i<m; i++) {
3837       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3838       mycols += locrowlens[i];
3839       vals   += locrowlens[i];
3840       jj++;
3841     }
3842     /* read in other processors (except the last one) and ship out */
3843     for (i=1; i<size-1; i++) {
3844       nz   = procsnz[i];
3845       vals = buf;
3846       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3847       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3848     }
3849     /* the last proc */
3850     if (size != 1){
3851       nz   = procsnz[i] - extra_rows;
3852       vals = buf;
3853       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3854       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3855       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3856     }
3857     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3858   } else {
3859     /* receive numeric values */
3860     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3861 
3862     /* receive message of values*/
3863     vals   = buf;
3864     mycols = ibuf;
3865     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3866     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3867     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3868 
3869     /* insert into matrix */
3870     jj      = rstart*bs;
3871     for (i=0; i<m; i++) {
3872       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3873       mycols += locrowlens[i];
3874       vals   += locrowlens[i];
3875       jj++;
3876     }
3877   }
3878   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3879   ierr = PetscFree(buf);CHKERRQ(ierr);
3880   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3881   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3882   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3883   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3884   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886 
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 #undef __FUNCT__
3891 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3892 /*@
3893    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3894 
3895    Input Parameters:
3896 .  mat  - the matrix
3897 .  fact - factor
3898 
3899    Not Collective, each process can use a different factor
3900 
3901    Level: advanced
3902 
3903   Notes:
3904    This can also be set by the command line option: -mat_use_hash_table <fact>
3905 
3906 .keywords: matrix, hashtable, factor, HT
3907 
3908 .seealso: MatSetOption()
3909 @*/
3910 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3911 {
3912   PetscErrorCode ierr;
3913 
3914   PetscFunctionBegin;
3915   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 EXTERN_C_BEGIN
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3922 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3923 {
3924   Mat_MPIBAIJ *baij;
3925 
3926   PetscFunctionBegin;
3927   baij = (Mat_MPIBAIJ*)mat->data;
3928   baij->ht_fact = fact;
3929   PetscFunctionReturn(0);
3930 }
3931 EXTERN_C_END
3932 
3933 #undef __FUNCT__
3934 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3935 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3936 {
3937   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3938   PetscFunctionBegin;
3939   *Ad     = a->A;
3940   *Ao     = a->B;
3941   *colmap = a->garray;
3942   PetscFunctionReturn(0);
3943 }
3944 
3945 /*
3946     Special version for direct calls from Fortran (to eliminate two function call overheads
3947 */
3948 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3949 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3950 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3951 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3952 #endif
3953 
3954 #undef __FUNCT__
3955 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3956 /*@C
3957   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3958 
3959   Collective on Mat
3960 
3961   Input Parameters:
3962 + mat - the matrix
3963 . min - number of input rows
3964 . im - input rows
3965 . nin - number of input columns
3966 . in - input columns
3967 . v - numerical values input
3968 - addvin - INSERT_VALUES or ADD_VALUES
3969 
3970   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3971 
3972   Level: advanced
3973 
3974 .seealso:   MatSetValuesBlocked()
3975 @*/
3976 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3977 {
3978   /* convert input arguments to C version */
3979   Mat             mat = *matin;
3980   PetscInt        m = *min, n = *nin;
3981   InsertMode      addv = *addvin;
3982 
3983   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3984   const MatScalar *value;
3985   MatScalar       *barray=baij->barray;
3986   PetscBool       roworiented = baij->roworiented;
3987   PetscErrorCode  ierr;
3988   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3989   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3990   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3991 
3992   PetscFunctionBegin;
3993   /* tasks normally handled by MatSetValuesBlocked() */
3994   if (mat->insertmode == NOT_SET_VALUES) {
3995     mat->insertmode = addv;
3996   }
3997 #if defined(PETSC_USE_DEBUG)
3998   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3999   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4000 #endif
4001   if (mat->assembled) {
4002     mat->was_assembled = PETSC_TRUE;
4003     mat->assembled     = PETSC_FALSE;
4004   }
4005   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4006 
4007 
4008   if(!barray) {
4009     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4010     baij->barray = barray;
4011   }
4012 
4013   if (roworiented) {
4014     stepval = (n-1)*bs;
4015   } else {
4016     stepval = (m-1)*bs;
4017   }
4018   for (i=0; i<m; i++) {
4019     if (im[i] < 0) continue;
4020 #if defined(PETSC_USE_DEBUG)
4021     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4022 #endif
4023     if (im[i] >= rstart && im[i] < rend) {
4024       row = im[i] - rstart;
4025       for (j=0; j<n; j++) {
4026         /* If NumCol = 1 then a copy is not required */
4027         if ((roworiented) && (n == 1)) {
4028           barray = (MatScalar*)v + i*bs2;
4029         } else if((!roworiented) && (m == 1)) {
4030           barray = (MatScalar*)v + j*bs2;
4031         } else { /* Here a copy is required */
4032           if (roworiented) {
4033             value = v + i*(stepval+bs)*bs + j*bs;
4034           } else {
4035             value = v + j*(stepval+bs)*bs + i*bs;
4036           }
4037           for (ii=0; ii<bs; ii++,value+=stepval) {
4038             for (jj=0; jj<bs; jj++) {
4039               *barray++  = *value++;
4040             }
4041           }
4042           barray -=bs2;
4043         }
4044 
4045         if (in[j] >= cstart && in[j] < cend){
4046           col  = in[j] - cstart;
4047           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4048         }
4049         else if (in[j] < 0) continue;
4050 #if defined(PETSC_USE_DEBUG)
4051         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4052 #endif
4053         else {
4054           if (mat->was_assembled) {
4055             if (!baij->colmap) {
4056               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4057             }
4058 
4059 #if defined(PETSC_USE_DEBUG)
4060 #if defined (PETSC_USE_CTABLE)
4061             { PetscInt data;
4062               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4063               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4064             }
4065 #else
4066             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4067 #endif
4068 #endif
4069 #if defined (PETSC_USE_CTABLE)
4070 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4071             col  = (col - 1)/bs;
4072 #else
4073             col = (baij->colmap[in[j]] - 1)/bs;
4074 #endif
4075             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4076               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4077               col =  in[j];
4078             }
4079           }
4080           else col = in[j];
4081           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4082         }
4083       }
4084     } else {
4085       if (!baij->donotstash) {
4086         if (roworiented) {
4087           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4088         } else {
4089           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4090         }
4091       }
4092     }
4093   }
4094 
4095   /* task normally handled by MatSetValuesBlocked() */
4096   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4097   PetscFunctionReturn(0);
4098 }
4099 
4100 #undef __FUNCT__
4101 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4102 /*@
4103      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4104          CSR format the local rows.
4105 
4106    Collective on MPI_Comm
4107 
4108    Input Parameters:
4109 +  comm - MPI communicator
4110 .  bs - the block size, only a block size of 1 is supported
4111 .  m - number of local rows (Cannot be PETSC_DECIDE)
4112 .  n - This value should be the same as the local size used in creating the
4113        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4114        calculated if N is given) For square matrices n is almost always m.
4115 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4116 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4117 .   i - row indices
4118 .   j - column indices
4119 -   a - matrix values
4120 
4121    Output Parameter:
4122 .   mat - the matrix
4123 
4124    Level: intermediate
4125 
4126    Notes:
4127        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4128      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4129      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4130 
4131        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4132 
4133 .keywords: matrix, aij, compressed row, sparse, parallel
4134 
4135 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4136           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4137 @*/
4138 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4139 {
4140   PetscErrorCode ierr;
4141 
4142 
4143  PetscFunctionBegin;
4144   if (i[0]) {
4145     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4146   }
4147   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4148   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4149   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4150   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4151   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4152   PetscFunctionReturn(0);
4153 }
4154