xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3bf036e263fd78807e2931ff42d9ddcd8aae3fd4)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
244               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
252             else col += in[j]%bs;
253           } else col = in[j];
254           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
255           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
256           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
257         }
258       }
259     } else {
260       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
261       if (!baij->donotstash) {
262         mat->assembled = PETSC_FALSE;
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscBool         roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if (!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315 	    value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) {
319               barray[jj]  = value[jj];
320             }
321             barray += bs;
322           }
323           barray -= bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
357               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscBool      roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscBool         roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644       }
645       v = bmat->a;
646       nz = bmat->nz*bs2;
647       for (i=0; i<nz; i++) {
648         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
649       }
650       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
651       *nrm = PetscSqrtReal(*nrm);
652     } else if (type == NORM_1) { /* max column sum */
653       PetscReal *tmp,*tmp2;
654       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
655       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
656       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
657       v = amat->a; jj = amat->j;
658       for (i=0; i<amat->nz; i++) {
659         for (j=0; j<bs; j++){
660           col = bs*(cstart + *jj) + j; /* column index */
661           for (row=0; row<bs; row++){
662             tmp[col] += PetscAbsScalar(*v);  v++;
663           }
664         }
665         jj++;
666       }
667       v = bmat->a; jj = bmat->j;
668       for (i=0; i<bmat->nz; i++) {
669         for (j=0; j<bs; j++){
670           col = bs*garray[*jj] + j;
671           for (row=0; row<bs; row++){
672             tmp[col] += PetscAbsScalar(*v); v++;
673           }
674         }
675         jj++;
676       }
677       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
678       *nrm = 0.0;
679       for (j=0; j<mat->cmap->N; j++) {
680         if (tmp2[j] > *nrm) *nrm = tmp2[j];
681       }
682       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
683     } else if (type == NORM_INFINITY) { /* max row sum */
684       PetscReal *sums;
685       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
686       sum = 0.0;
687       for (j=0; j<amat->mbs; j++) {
688         for (row=0; row<bs; row++) sums[row] = 0.0;
689         v = amat->a + bs2*amat->i[j];
690         nz = amat->i[j+1]-amat->i[j];
691         for (i=0; i<nz; i++) {
692           for (col=0; col<bs; col++){
693             for (row=0; row<bs; row++){
694               sums[row] += PetscAbsScalar(*v); v++;
695             }
696           }
697         }
698         v = bmat->a + bs2*bmat->i[j];
699         nz = bmat->i[j+1]-bmat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         for (row=0; row<bs; row++){
708           if (sums[row] > sum) sum = sums[row];
709         }
710       }
711       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
712       ierr = PetscFree(sums);CHKERRQ(ierr);
713     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
714   }
715   PetscFunctionReturn(0);
716 }
717 
718 /*
719   Creates the hash table, and sets the table
720   This table is created only once.
721   If new entried need to be added to the matrix
722   then the hash table has to be destroyed and
723   recreated.
724 */
725 #undef __FUNCT__
726 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
727 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
728 {
729   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
730   Mat            A = baij->A,B=baij->B;
731   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
732   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
733   PetscErrorCode ierr;
734   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
735   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
736   PetscInt       *HT,key;
737   MatScalar      **HD;
738   PetscReal      tmp;
739 #if defined(PETSC_USE_INFO)
740   PetscInt       ct=0,max=0;
741 #endif
742 
743   PetscFunctionBegin;
744   if (baij->ht) PetscFunctionReturn(0);
745 
746   baij->ht_size = (PetscInt)(factor*nz);
747   ht_size       = baij->ht_size;
748 
749   /* Allocate Memory for Hash Table */
750   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
751   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
752   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
753   HD   = baij->hd;
754   HT   = baij->ht;
755 
756   /* Loop Over A */
757   for (i=0; i<a->mbs; i++) {
758     for (j=ai[i]; j<ai[i+1]; j++) {
759       row = i+rstart;
760       col = aj[j]+cstart;
761 
762       key = row*Nbs + col + 1;
763       h1  = HASH(ht_size,key,tmp);
764       for (k=0; k<ht_size; k++){
765         if (!HT[(h1+k)%ht_size]) {
766           HT[(h1+k)%ht_size] = key;
767           HD[(h1+k)%ht_size] = a->a + j*bs2;
768           break;
769 #if defined(PETSC_USE_INFO)
770         } else {
771           ct++;
772 #endif
773         }
774       }
775 #if defined(PETSC_USE_INFO)
776       if (k> max) max = k;
777 #endif
778     }
779   }
780   /* Loop Over B */
781   for (i=0; i<b->mbs; i++) {
782     for (j=bi[i]; j<bi[i+1]; j++) {
783       row = i+rstart;
784       col = garray[bj[j]];
785       key = row*Nbs + col + 1;
786       h1  = HASH(ht_size,key,tmp);
787       for (k=0; k<ht_size; k++){
788         if (!HT[(h1+k)%ht_size]) {
789           HT[(h1+k)%ht_size] = key;
790           HD[(h1+k)%ht_size] = b->a + j*bs2;
791           break;
792 #if defined(PETSC_USE_INFO)
793         } else {
794           ct++;
795 #endif
796         }
797       }
798 #if defined(PETSC_USE_INFO)
799       if (k> max) max = k;
800 #endif
801     }
802   }
803 
804   /* Print Summary */
805 #if defined(PETSC_USE_INFO)
806   for (i=0,j=0; i<ht_size; i++) {
807     if (HT[i]) {j++;}
808   }
809   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
810 #endif
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
816 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
819   PetscErrorCode ierr;
820   PetscInt       nstash,reallocs;
821   InsertMode     addv;
822 
823   PetscFunctionBegin;
824   if (baij->donotstash || mat->nooffprocentries) {
825     PetscFunctionReturn(0);
826   }
827 
828   /* make sure all processors are either in INSERTMODE or ADDMODE */
829   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
830   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
831   mat->insertmode = addv; /* in case this processor had no cache */
832 
833   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
834   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
835   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
836   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
837   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
838   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
844 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
845 {
846   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
847   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
848   PetscErrorCode ierr;
849   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
850   PetscInt       *row,*col;
851   PetscBool      r1,r2,r3,other_disassembled;
852   MatScalar      *val;
853   InsertMode     addv = mat->insertmode;
854   PetscMPIInt    n;
855 
856   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
857   PetscFunctionBegin;
858   if (!baij->donotstash && !mat->nooffprocentries) {
859     while (1) {
860       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
861       if (!flg) break;
862 
863       for (i=0; i<n;) {
864         /* Now identify the consecutive vals belonging to the same row */
865         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
866         if (j < n) ncols = j-i;
867         else       ncols = n-i;
868         /* Now assemble all these values with a single function call */
869         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
870         i = j;
871       }
872     }
873     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
874     /* Now process the block-stash. Since the values are stashed column-oriented,
875        set the roworiented flag to column oriented, and after MatSetValues()
876        restore the original flags */
877     r1 = baij->roworiented;
878     r2 = a->roworiented;
879     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
880     baij->roworiented = PETSC_FALSE;
881     a->roworiented    = PETSC_FALSE;
882     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
883     while (1) {
884       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
885       if (!flg) break;
886 
887       for (i=0; i<n;) {
888         /* Now identify the consecutive vals belonging to the same row */
889         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
890         if (j < n) ncols = j-i;
891         else       ncols = n-i;
892         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
893         i = j;
894       }
895     }
896     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
897     baij->roworiented = r1;
898     a->roworiented    = r2;
899     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
900   }
901 
902   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
903   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
904 
905   /* determine if any processor has disassembled, if so we must
906      also disassemble ourselfs, in order that we may reassemble. */
907   /*
908      if nonzero structure of submatrix B cannot change then we know that
909      no processor disassembled thus we can skip this stuff
910   */
911   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
912     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
913     if (mat->was_assembled && !other_disassembled) {
914       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
915     }
916   }
917 
918   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
919     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
920   }
921   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
922   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
923   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
924 
925 #if defined(PETSC_USE_INFO)
926   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
927     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
928     baij->ht_total_ct  = 0;
929     baij->ht_insert_ct = 0;
930   }
931 #endif
932   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
933     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
934     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
935     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
936   }
937 
938   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
939   baij->rowvalues = 0;
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNCT__
944 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
945 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
946 {
947   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
948   PetscErrorCode    ierr;
949   PetscMPIInt       size = baij->size,rank = baij->rank;
950   PetscInt          bs = mat->rmap->bs;
951   PetscBool         iascii,isdraw;
952   PetscViewer       sviewer;
953   PetscViewerFormat format;
954 
955   PetscFunctionBegin;
956   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
957   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
958   if (iascii) {
959     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
960     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
961       MatInfo info;
962       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
963       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
966              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
967       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
968       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
969       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
971       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
973       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
974       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
975       PetscFunctionReturn(0);
976     } else if (format == PETSC_VIEWER_ASCII_INFO) {
977       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
978       PetscFunctionReturn(0);
979     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
980       PetscFunctionReturn(0);
981     }
982   }
983 
984   if (isdraw) {
985     PetscDraw       draw;
986     PetscBool  isnull;
987     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
988     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
989   }
990 
991   if (size == 1) {
992     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
993     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
994   } else {
995     /* assemble the entire matrix onto first processor. */
996     Mat         A;
997     Mat_SeqBAIJ *Aloc;
998     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
999     MatScalar   *a;
1000 
1001     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1002     /* Perhaps this should be the type of mat? */
1003     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1004     if (!rank) {
1005       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1006     } else {
1007       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1008     }
1009     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1010     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1011     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1012     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1013 
1014     /* copy over the A part */
1015     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1016     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1017     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1018 
1019     for (i=0; i<mbs; i++) {
1020       rvals[0] = bs*(baij->rstartbs + i);
1021       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1022       for (j=ai[i]; j<ai[i+1]; j++) {
1023         col = (baij->cstartbs+aj[j])*bs;
1024         for (k=0; k<bs; k++) {
1025           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1026           col++; a += bs;
1027         }
1028       }
1029     }
1030     /* copy over the B part */
1031     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1032     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1033     for (i=0; i<mbs; i++) {
1034       rvals[0] = bs*(baij->rstartbs + i);
1035       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036       for (j=ai[i]; j<ai[i+1]; j++) {
1037         col = baij->garray[aj[j]]*bs;
1038         for (k=0; k<bs; k++) {
1039           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040           col++; a += bs;
1041         }
1042       }
1043     }
1044     ierr = PetscFree(rvals);CHKERRQ(ierr);
1045     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1046     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1047     /*
1048        Everyone has to call to draw the matrix since the graphics waits are
1049        synchronized across all processors that share the PetscDraw object
1050     */
1051     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1052     if (!rank) {
1053       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1054     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1055       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1056       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1057     }
1058     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     ierr = MatDestroy(&A);CHKERRQ(ierr);
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1066 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1067 {
1068   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1069   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1070   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1071   PetscErrorCode ierr;
1072   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1073   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1074   int            fd;
1075   PetscScalar    *column_values;
1076   FILE           *file;
1077   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1078   PetscInt       message_count,flowcontrolcount;
1079 
1080   PetscFunctionBegin;
1081   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1082   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1083   nz   = bs2*(A->nz + B->nz);
1084   rlen = mat->rmap->n;
1085   if (!rank) {
1086     header[0] = MAT_FILE_CLASSID;
1087     header[1] = mat->rmap->N;
1088     header[2] = mat->cmap->N;
1089     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1090     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1091     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1092     /* get largest number of rows any processor has */
1093     range = mat->rmap->range;
1094     for (i=1; i<size; i++) {
1095       rlen = PetscMax(rlen,range[i+1] - range[i]);
1096     }
1097   } else {
1098     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1099   }
1100 
1101   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1102   /* compute lengths of each row  */
1103   for (i=0; i<a->mbs; i++) {
1104     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1105   }
1106   /* store the row lengths to the file */
1107   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1108   if (!rank) {
1109     MPI_Status status;
1110     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1111     rlen  = (range[1] - range[0])/bs;
1112     for (i=0; i<rlen; i++) {
1113       for (j=0; j<bs; j++) {
1114         row_lens[i*bs+j] = bs*crow_lens[i];
1115       }
1116     }
1117     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     for (i=1; i<size; i++) {
1119       rlen = (range[i+1] - range[i])/bs;
1120       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1121       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1122       for (k=0; k<rlen; k++) {
1123 	for (j=0; j<bs; j++) {
1124 	  row_lens[k*bs+j] = bs*crow_lens[k];
1125 	}
1126       }
1127       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1128     }
1129     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1130     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1131   } else {
1132     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1133     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1134     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1135   }
1136   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1137 
1138   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1139      information needed to make it for each row from a block row. This does require more communication but still not more than
1140      the communication needed for the nonzero values  */
1141   nzmax = nz; /*  space a largest processor needs */
1142   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1143   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1144   cnt  = 0;
1145   for (i=0; i<a->mbs; i++) {
1146     pcnt = cnt;
1147     for (j=B->i[i]; j<B->i[i+1]; j++) {
1148       if ( (col = garray[B->j[j]]) > cstart) break;
1149       for (l=0; l<bs; l++) {
1150 	column_indices[cnt++] = bs*col+l;
1151       }
1152     }
1153     for (k=A->i[i]; k<A->i[i+1]; k++) {
1154       for (l=0; l<bs; l++) {
1155         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1156       }
1157     }
1158     for (; j<B->i[i+1]; j++) {
1159       for (l=0; l<bs; l++) {
1160         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1161       }
1162     }
1163     len = cnt - pcnt;
1164     for (k=1; k<bs; k++) {
1165       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1166       cnt += len;
1167     }
1168   }
1169   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1170 
1171   /* store the columns to the file */
1172   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1173   if (!rank) {
1174     MPI_Status status;
1175     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     for (i=1; i<size; i++) {
1177       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1178       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1179       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1180       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1183   } else {
1184     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1185     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1186     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1187     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1188   }
1189   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1190 
1191   /* load up the numerical values */
1192   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1193   cnt = 0;
1194   for (i=0; i<a->mbs; i++) {
1195     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1196     for (j=B->i[i]; j<B->i[i+1]; j++) {
1197       if ( garray[B->j[j]] > cstart) break;
1198       for (l=0; l<bs; l++) {
1199         for (ll=0; ll<bs; ll++) {
1200 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1201         }
1202       }
1203       cnt += bs;
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       for (l=0; l<bs; l++) {
1207         for (ll=0; ll<bs; ll++) {
1208           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1209         }
1210       }
1211       cnt += bs;
1212     }
1213     for (; j<B->i[i+1]; j++) {
1214       for (l=0; l<bs; l++) {
1215         for (ll=0; ll<bs; ll++) {
1216 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1217         }
1218       }
1219       cnt += bs;
1220     }
1221     cnt += (bs-1)*rlen;
1222   }
1223   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1224 
1225   /* store the column values to the file */
1226   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1227   if (!rank) {
1228     MPI_Status status;
1229     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1232       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1233       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1239     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1240     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1241     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1242   }
1243   ierr = PetscFree(column_values);CHKERRQ(ierr);
1244 
1245   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1246   if (file) {
1247     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatView_MPIBAIJ"
1254 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1255 {
1256   PetscErrorCode ierr;
1257   PetscBool      iascii,isdraw,issocket,isbinary;
1258 
1259   PetscFunctionBegin;
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1261   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1262   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1264   if (iascii || isdraw || issocket) {
1265     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1266   } else if (isbinary) {
1267     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1268   } else {
1269     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1276 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1277 {
1278   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282 #if defined(PETSC_USE_LOG)
1283   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1284 #endif
1285   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1286   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1287   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1288   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1289 #if defined (PETSC_USE_CTABLE)
1290   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1291 #else
1292   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1293 #endif
1294   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1295   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1296   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1297   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1298   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1299   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1300   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1301   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1302 
1303   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatMult_MPIBAIJ"
1318 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1319 {
1320   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1321   PetscErrorCode ierr;
1322   PetscInt       nt;
1323 
1324   PetscFunctionBegin;
1325   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1326   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1327   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1328   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1329   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1331   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1332   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1338 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1339 {
1340   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1345   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1346   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1347   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1353 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357   PetscBool      merged;
1358 
1359   PetscFunctionBegin;
1360   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1361   /* do nondiagonal part */
1362   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1363   if (!merged) {
1364     /* send it on its way */
1365     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366     /* do local part */
1367     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1368     /* receive remote parts: note this assumes the values are not actually */
1369     /* inserted in yy until the next line */
1370     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371   } else {
1372     /* do local part */
1373     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1374     /* send it on its way */
1375     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1376     /* values actually were received in the Begin() but we need to call this nop */
1377     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1384 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1385 {
1386   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   /* do nondiagonal part */
1391   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1392   /* send it on its way */
1393   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1394   /* do local part */
1395   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1396   /* receive remote parts: note this assumes the values are not actually */
1397   /* inserted in yy until the next line, which is true for my implementation*/
1398   /* but is not perhaps always true. */
1399   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 /*
1404   This only works correctly for square matrices where the subblock A->A is the
1405    diagonal block
1406 */
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1409 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1410 {
1411   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1416   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatScale_MPIBAIJ"
1422 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1429   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1435 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1436 {
1437   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1438   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1439   PetscErrorCode ierr;
1440   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1441   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1442   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1443 
1444   PetscFunctionBegin;
1445   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1446   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1447   mat->getrowactive = PETSC_TRUE;
1448 
1449   if (!mat->rowvalues && (idx || v)) {
1450     /*
1451         allocate enough space to hold information from the longest row.
1452     */
1453     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1454     PetscInt     max = 1,mbs = mat->mbs,tmp;
1455     for (i=0; i<mbs; i++) {
1456       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1457       if (max < tmp) { max = tmp; }
1458     }
1459     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1460   }
1461   lrow = row - brstart;
1462 
1463   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1464   if (!v)   {pvA = 0; pvB = 0;}
1465   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1466   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1467   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1468   nztot = nzA + nzB;
1469 
1470   cmap  = mat->garray;
1471   if (v  || idx) {
1472     if (nztot) {
1473       /* Sort by increasing column numbers, assuming A and B already sorted */
1474       PetscInt imark = -1;
1475       if (v) {
1476         *v = v_p = mat->rowvalues;
1477         for (i=0; i<nzB; i++) {
1478           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1479           else break;
1480         }
1481         imark = i;
1482         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1483         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1484       }
1485       if (idx) {
1486         *idx = idx_p = mat->rowindices;
1487         if (imark > -1) {
1488           for (i=0; i<imark; i++) {
1489             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1490           }
1491         } else {
1492           for (i=0; i<nzB; i++) {
1493             if (cmap[cworkB[i]/bs] < cstart)
1494               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1495             else break;
1496           }
1497           imark = i;
1498         }
1499         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1500         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1501       }
1502     } else {
1503       if (idx) *idx = 0;
1504       if (v)   *v   = 0;
1505     }
1506   }
1507   *nz = nztot;
1508   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1509   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1515 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1516 {
1517   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1518 
1519   PetscFunctionBegin;
1520   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1521   baij->getrowactive = PETSC_FALSE;
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1527 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1528 {
1529   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1534   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1540 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1541 {
1542   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1543   Mat            A = a->A,B = a->B;
1544   PetscErrorCode ierr;
1545   PetscReal      isend[5],irecv[5];
1546 
1547   PetscFunctionBegin;
1548   info->block_size     = (PetscReal)matin->rmap->bs;
1549   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1550   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1551   isend[3] = info->memory;  isend[4] = info->mallocs;
1552   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1553   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1554   isend[3] += info->memory;  isend[4] += info->mallocs;
1555   if (flag == MAT_LOCAL) {
1556     info->nz_used      = isend[0];
1557     info->nz_allocated = isend[1];
1558     info->nz_unneeded  = isend[2];
1559     info->memory       = isend[3];
1560     info->mallocs      = isend[4];
1561   } else if (flag == MAT_GLOBAL_MAX) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1563     info->nz_used      = irecv[0];
1564     info->nz_allocated = irecv[1];
1565     info->nz_unneeded  = irecv[2];
1566     info->memory       = irecv[3];
1567     info->mallocs      = irecv[4];
1568   } else if (flag == MAT_GLOBAL_SUM) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570     info->nz_used      = irecv[0];
1571     info->nz_allocated = irecv[1];
1572     info->nz_unneeded  = irecv[2];
1573     info->memory       = irecv[3];
1574     info->mallocs      = irecv[4];
1575   } else {
1576     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1577   }
1578   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1579   info->fill_ratio_needed = 0;
1580   info->factor_mallocs    = 0;
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1586 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1587 {
1588   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   switch (op) {
1593   case MAT_NEW_NONZERO_LOCATIONS:
1594   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1595   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1596   case MAT_KEEP_NONZERO_PATTERN:
1597   case MAT_NEW_NONZERO_LOCATION_ERR:
1598     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1599     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1600     break;
1601   case MAT_ROW_ORIENTED:
1602     a->roworiented = flg;
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_NEW_DIAGONALS:
1607     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1608     break;
1609   case MAT_IGNORE_OFF_PROC_ENTRIES:
1610     a->donotstash = flg;
1611     break;
1612   case MAT_USE_HASH_TABLE:
1613     a->ht_flag = flg;
1614     break;
1615   case MAT_SYMMETRIC:
1616   case MAT_STRUCTURALLY_SYMMETRIC:
1617   case MAT_HERMITIAN:
1618   case MAT_SYMMETRY_ETERNAL:
1619     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1620     break;
1621   default:
1622     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1630 {
1631   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1632   Mat_SeqBAIJ    *Aloc;
1633   Mat            B;
1634   PetscErrorCode ierr;
1635   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1636   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1637   MatScalar      *a;
1638 
1639   PetscFunctionBegin;
1640   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1641   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1642     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1643     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1644     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1645     /* Do not know preallocation information, but must set block size */
1646     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1647   } else {
1648     B = *matout;
1649   }
1650 
1651   /* copy over the A part */
1652   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1653   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1654   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1655 
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = (baij->cstartbs+aj[j])*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663         col++; a += bs;
1664       }
1665     }
1666   }
1667   /* copy over the B part */
1668   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1669   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1670   for (i=0; i<mbs; i++) {
1671     rvals[0] = bs*(baij->rstartbs + i);
1672     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1673     for (j=ai[i]; j<ai[i+1]; j++) {
1674       col = baij->garray[aj[j]]*bs;
1675       for (k=0; k<bs; k++) {
1676         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1677         col++; a += bs;
1678       }
1679     }
1680   }
1681   ierr = PetscFree(rvals);CHKERRQ(ierr);
1682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684 
1685   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1686     *matout = B;
1687   } else {
1688     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1689   }
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1695 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1696 {
1697   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1698   Mat            a = baij->A,b = baij->B;
1699   PetscErrorCode ierr;
1700   PetscInt       s1,s2,s3;
1701 
1702   PetscFunctionBegin;
1703   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1704   if (rr) {
1705     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1706     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1707     /* Overlap communication with computation. */
1708     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1709   }
1710   if (ll) {
1711     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1712     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1713     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1714   }
1715   /* scale  the diagonal block */
1716   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1717 
1718   if (rr) {
1719     /* Do a scatter end and then right scale the off-diagonal block */
1720     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1721     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1722   }
1723 
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1730 {
1731   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1732   PetscErrorCode    ierr;
1733   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1734   PetscInt          i,*owners = A->rmap->range;
1735   PetscInt          *nprocs,j,idx,nsends,row;
1736   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1737   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1738   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1739   MPI_Comm          comm = ((PetscObject)A)->comm;
1740   MPI_Request       *send_waits,*recv_waits;
1741   MPI_Status        recv_status,*send_status;
1742   const PetscScalar *xx;
1743   PetscScalar       *bb;
1744 #if defined(PETSC_DEBUG)
1745   PetscBool         found = PETSC_FALSE;
1746 #endif
1747 
1748   PetscFunctionBegin;
1749   /*  first count number of contributors to each processor */
1750   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1751   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1753   j = 0;
1754   for (i=0; i<N; i++) {
1755     if (lastidx > (idx = rows[i])) j = 0;
1756     lastidx = idx;
1757     for (; j<size; j++) {
1758       if (idx >= owners[j] && idx < owners[j+1]) {
1759         nprocs[2*j]++;
1760         nprocs[2*j+1] = 1;
1761         owner[i] = j;
1762 #if defined(PETSC_DEBUG)
1763         found = PETSC_TRUE;
1764 #endif
1765         break;
1766       }
1767     }
1768 #if defined(PETSC_DEBUG)
1769     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1770     found = PETSC_FALSE;
1771 #endif
1772   }
1773   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1774 
1775   if (A->nooffproczerorows) {
1776     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1777     nrecvs = nsends;
1778     nmax   = N;
1779   } else {
1780     /* inform other processors of number of messages and max length*/
1781     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1782   }
1783 
1784   /* post receives:   */
1785   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1786   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1787   for (i=0; i<nrecvs; i++) {
1788     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1789   }
1790 
1791   /* do sends:
1792      1) starts[i] gives the starting index in svalues for stuff going to
1793      the ith processor
1794   */
1795   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1796   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1797   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1798   starts[0]  = 0;
1799   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1800   for (i=0; i<N; i++) {
1801     svalues[starts[owner[i]]++] = rows[i];
1802   }
1803 
1804   starts[0] = 0;
1805   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1806   count = 0;
1807   for (i=0; i<size; i++) {
1808     if (nprocs[2*i+1]) {
1809       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1810     }
1811   }
1812   ierr = PetscFree(starts);CHKERRQ(ierr);
1813 
1814   base = owners[rank];
1815 
1816   /*  wait on receives */
1817   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1818   count  = nrecvs;
1819   slen = 0;
1820   while (count) {
1821     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1822     /* unpack receives into our local space */
1823     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1824     source[imdex]  = recv_status.MPI_SOURCE;
1825     lens[imdex]    = n;
1826     slen          += n;
1827     count--;
1828   }
1829   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1830 
1831   /* move the data into the send scatter */
1832   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1833   count = 0;
1834   for (i=0; i<nrecvs; i++) {
1835     values = rvalues + i*nmax;
1836     for (j=0; j<lens[i]; j++) {
1837       lrows[count++] = values[j] - base;
1838     }
1839   }
1840   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1841   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1842   ierr = PetscFree(owner);CHKERRQ(ierr);
1843   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1844 
1845   /* fix right hand side if needed */
1846   if (x && b) {
1847     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1848     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1849     for (i=0; i<slen; i++) {
1850       bb[lrows[i]] = diag*xx[lrows[i]];
1851     }
1852     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1853     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1854   }
1855 
1856   /* actually zap the local rows */
1857   /*
1858         Zero the required rows. If the "diagonal block" of the matrix
1859      is square and the user wishes to set the diagonal we use separate
1860      code so that MatSetValues() is not called for each diagonal allocating
1861      new memory, thus calling lots of mallocs and slowing things down.
1862 
1863   */
1864   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1865   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1866   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1867     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1868   } else if (diag != 0.0) {
1869     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1870     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1871        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1872     for (i=0; i<slen; i++) {
1873       row  = lrows[i] + rstart_bs;
1874       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1875     }
1876     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878   } else {
1879     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1880   }
1881 
1882   ierr = PetscFree(lrows);CHKERRQ(ierr);
1883 
1884   /* wait on sends */
1885   if (nsends) {
1886     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1887     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1888     ierr = PetscFree(send_status);CHKERRQ(ierr);
1889   }
1890   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1891   ierr = PetscFree(svalues);CHKERRQ(ierr);
1892 
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatEqual_MPIBAIJ"
1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1913 {
1914   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1915   Mat            a,b,c,d;
1916   PetscBool      flg;
1917   PetscErrorCode ierr;
1918 
1919   PetscFunctionBegin;
1920   a = matA->A; b = matA->B;
1921   c = matB->A; d = matB->B;
1922 
1923   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1924   if (flg) {
1925     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1926   }
1927   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatCopy_MPIBAIJ"
1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1934 {
1935   PetscErrorCode ierr;
1936   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1937   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1938 
1939   PetscFunctionBegin;
1940   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1941   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1942     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1943   } else {
1944     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1945     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1952 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1964 {
1965   PetscErrorCode ierr;
1966   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1967   PetscBLASInt   bnz,one=1;
1968   Mat_SeqBAIJ    *x,*y;
1969 
1970   PetscFunctionBegin;
1971   if (str == SAME_NONZERO_PATTERN) {
1972     PetscScalar alpha = a;
1973     x = (Mat_SeqBAIJ *)xx->A->data;
1974     y = (Mat_SeqBAIJ *)yy->A->data;
1975     bnz = PetscBLASIntCast(x->nz);
1976     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1977     x = (Mat_SeqBAIJ *)xx->B->data;
1978     y = (Mat_SeqBAIJ *)yy->B->data;
1979     bnz = PetscBLASIntCast(x->nz);
1980     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1981   } else {
1982     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1989 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1990 {
1991   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1996   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2002 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2003 {
2004   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2005   PetscErrorCode ierr;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2009   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2015 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2016 {
2017   PetscErrorCode ierr;
2018   IS             iscol_local;
2019   PetscInt       csize;
2020 
2021   PetscFunctionBegin;
2022   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2023   if (call == MAT_REUSE_MATRIX) {
2024     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2025     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2026   } else {
2027     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2028   }
2029   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2030   if (call == MAT_INITIAL_MATRIX) {
2031     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2032     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2039 /*
2040   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2041   in local and then by concatenating the local matrices the end result.
2042   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2043 */
2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    rank,size;
2048   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2049   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2050   Mat            M,Mreuse;
2051   MatScalar      *vwork,*aa;
2052   MPI_Comm       comm = ((PetscObject)mat)->comm;
2053   IS             isrow_new, iscol_new;
2054   PetscBool      idflag,allrows, allcols;
2055   Mat_SeqBAIJ    *aij;
2056 
2057 
2058   PetscFunctionBegin;
2059   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2061   /* The compression and expansion should be avoided. Doesn't point
2062      out errors, might change the indices, hence buggey */
2063   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2064   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2065 
2066   /* Check for special case: each processor gets entire matrix columns */
2067   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2068   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2069   if (idflag && ncol == mat->cmap->N){
2070     allcols = PETSC_TRUE;
2071   } else {
2072     allcols = PETSC_FALSE;
2073   }
2074 
2075   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2076   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2077   if (idflag && nrow == mat->rmap->N){
2078     allrows = PETSC_TRUE;
2079   } else {
2080     allrows = PETSC_FALSE;
2081   }
2082   if (call ==  MAT_REUSE_MATRIX) {
2083     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2084     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2085     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2086   } else {
2087     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2088   }
2089   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2090   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2091   /*
2092       m - number of local rows
2093       n - number of columns (same on all processors)
2094       rstart - first row in new global matrix generated
2095   */
2096   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2097   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2098   m    = m/bs;
2099   n    = n/bs;
2100 
2101   if (call == MAT_INITIAL_MATRIX) {
2102     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2103     ii  = aij->i;
2104     jj  = aij->j;
2105 
2106     /*
2107         Determine the number of non-zeros in the diagonal and off-diagonal
2108         portions of the matrix in order to do correct preallocation
2109     */
2110 
2111     /* first get start and end of "diagonal" columns */
2112     if (csize == PETSC_DECIDE) {
2113       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2114       if (mglobal == n*bs) { /* square matrix */
2115 	nlocal = m;
2116       } else {
2117         nlocal = n/size + ((n % size) > rank);
2118       }
2119     } else {
2120       nlocal = csize/bs;
2121     }
2122     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2123     rstart = rend - nlocal;
2124     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2125 
2126     /* next, compute all the lengths */
2127     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2128     olens = dlens + m;
2129     for (i=0; i<m; i++) {
2130       jend = ii[i+1] - ii[i];
2131       olen = 0;
2132       dlen = 0;
2133       for (j=0; j<jend; j++) {
2134         if (*jj < rstart || *jj >= rend) olen++;
2135         else dlen++;
2136         jj++;
2137       }
2138       olens[i] = olen;
2139       dlens[i] = dlen;
2140     }
2141     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2142     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2143     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2145     ierr = PetscFree(dlens);CHKERRQ(ierr);
2146   } else {
2147     PetscInt ml,nl;
2148 
2149     M = *newmat;
2150     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2151     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2152     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2153     /*
2154          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2155        rather than the slower MatSetValues().
2156     */
2157     M->was_assembled = PETSC_TRUE;
2158     M->assembled     = PETSC_FALSE;
2159   }
2160   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2161   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2162   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2163   ii  = aij->i;
2164   jj  = aij->j;
2165   aa  = aij->a;
2166   for (i=0; i<m; i++) {
2167     row   = rstart/bs + i;
2168     nz    = ii[i+1] - ii[i];
2169     cwork = jj;     jj += nz;
2170     vwork = aa;     aa += nz*bs*bs;
2171     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2172   }
2173 
2174   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2175   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   *newmat = M;
2177 
2178   /* save submatrix used in processor for next request */
2179   if (call ==  MAT_INITIAL_MATRIX) {
2180     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2181     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2182   }
2183 
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatPermute_MPIBAIJ"
2189 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2190 {
2191   MPI_Comm       comm,pcomm;
2192   PetscInt       first,rlocal_size,clocal_size,nrows;
2193   const PetscInt *rows;
2194   PetscMPIInt    size;
2195   IS             crowp,growp,irowp,lrowp,lcolp;
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2200   /* make a collective version of 'rowp' */
2201   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2202   if (pcomm==comm) {
2203     crowp = rowp;
2204   } else {
2205     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2206     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2207     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2208     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2209   }
2210   /* collect the global row permutation and invert it */
2211   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2212   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2213   if (pcomm!=comm) {
2214     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2215   }
2216   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2217   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2218   /* get the local target indices */
2219   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2220   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2221   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2222   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2223   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2224   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2225   /* the column permutation is so much easier;
2226      make a local version of 'colp' and invert it */
2227   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2228   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2229   if (size==1) {
2230     lcolp = colp;
2231   } else {
2232     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2233   }
2234   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2235   /* now we just get the submatrix */
2236   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2237   if (size>1) {
2238     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2239   }
2240   /* clean up */
2241   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2247 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2248 {
2249   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2250   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2251 
2252   PetscFunctionBegin;
2253   if (nghosts) { *nghosts = B->nbs;}
2254   if (ghosts) {*ghosts = baij->garray;}
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2262 /*
2263     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2264 */
2265 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2266 {
2267   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2268   PetscErrorCode        ierr;
2269   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2270   PetscInt              bs,i,n,nrows,j,k,m,ncols,col;
2271   const PetscInt        *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2272   PetscInt              nis = iscoloring->n,nctot,*cols;
2273   PetscInt              *rowhit,M,cstart,cend,colb;
2274   PetscInt              *columnsforrow,l;
2275   IS                    *isa;
2276   PetscBool              done,flg;
2277   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2278   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2279 
2280   PetscFunctionBegin;
2281   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2282   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2283 
2284   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2285   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2286   M                = mat->rmap->n/bs;
2287   cstart           = mat->cmap->rstart/bs;
2288   cend             = mat->cmap->rend/bs;
2289   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2290   c->N             = mat->cmap->N/bs;
2291   c->m             = mat->rmap->n/bs;
2292   c->rstart        = mat->rmap->rstart/bs;
2293 
2294   c->ncolors       = nis;
2295   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2296   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2297   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2298   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2299   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2300   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2301 
2302   /* Allow access to data structures of local part of matrix */
2303   if (!baij->colmap) {
2304     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2305   }
2306   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2307   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2308 
2309   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2310   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2311 
2312   for (i=0; i<nis; i++) {
2313     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2314     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2315     c->ncolumns[i] = n;
2316     if (n) {
2317       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2318       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2319       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2320     } else {
2321       c->columns[i]  = 0;
2322     }
2323 
2324     if (ctype == IS_COLORING_GLOBAL){
2325       /* Determine the total (parallel) number of columns of this color */
2326       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2327       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2328 
2329       nn   = PetscMPIIntCast(n);
2330       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2331       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2332       if (!nctot) {
2333         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2334       }
2335 
2336       disp[0] = 0;
2337       for (j=1; j<size; j++) {
2338         disp[j] = disp[j-1] + ncolsonproc[j-1];
2339       }
2340 
2341       /* Get complete list of columns for color on each processor */
2342       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2343       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2344       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2345     } else if (ctype == IS_COLORING_GHOSTED){
2346       /* Determine local number of columns of this color on this process, including ghost points */
2347       nctot = n;
2348       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2349       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2350     } else {
2351       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2352     }
2353 
2354     /*
2355        Mark all rows affect by these columns
2356     */
2357     /* Temporary option to allow for debugging/testing */
2358     flg  = PETSC_FALSE;
2359     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2360     if (!flg) {/*-----------------------------------------------------------------------------*/
2361       /* crude, fast version */
2362       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2363       /* loop over columns*/
2364       for (j=0; j<nctot; j++) {
2365         if (ctype == IS_COLORING_GHOSTED) {
2366           col = ltog[cols[j]];
2367         } else {
2368           col  = cols[j];
2369         }
2370         if (col >= cstart && col < cend) {
2371           /* column is in diagonal block of matrix */
2372           rows = A_cj + A_ci[col-cstart];
2373           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2374         } else {
2375 #if defined (PETSC_USE_CTABLE)
2376           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2377 	  colb --;
2378 #else
2379           colb = baij->colmap[col] - 1;
2380 #endif
2381           if (colb == -1) {
2382             m = 0;
2383           } else {
2384             colb = colb/bs;
2385             rows = B_cj + B_ci[colb];
2386             m    = B_ci[colb+1] - B_ci[colb];
2387           }
2388         }
2389         /* loop over columns marking them in rowhit */
2390         for (k=0; k<m; k++) {
2391           rowhit[*rows++] = col + 1;
2392         }
2393       }
2394 
2395       /* count the number of hits */
2396       nrows = 0;
2397       for (j=0; j<M; j++) {
2398         if (rowhit[j]) nrows++;
2399       }
2400       c->nrows[i]         = nrows;
2401       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2402       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2403       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2404       nrows = 0;
2405       for (j=0; j<M; j++) {
2406         if (rowhit[j]) {
2407           c->rows[i][nrows]           = j;
2408           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2409           nrows++;
2410         }
2411       }
2412     } else {/*-------------------------------------------------------------------------------*/
2413       /* slow version, using rowhit as a linked list */
2414       PetscInt currentcol,fm,mfm;
2415       rowhit[M] = M;
2416       nrows     = 0;
2417       /* loop over columns*/
2418       for (j=0; j<nctot; j++) {
2419         if (ctype == IS_COLORING_GHOSTED) {
2420           col = ltog[cols[j]];
2421         } else {
2422           col  = cols[j];
2423         }
2424         if (col >= cstart && col < cend) {
2425           /* column is in diagonal block of matrix */
2426           rows = A_cj + A_ci[col-cstart];
2427           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2428         } else {
2429 #if defined (PETSC_USE_CTABLE)
2430           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2431           colb --;
2432 #else
2433           colb = baij->colmap[col] - 1;
2434 #endif
2435           if (colb == -1) {
2436             m = 0;
2437           } else {
2438             colb = colb/bs;
2439             rows = B_cj + B_ci[colb];
2440             m    = B_ci[colb+1] - B_ci[colb];
2441           }
2442         }
2443 
2444         /* loop over columns marking them in rowhit */
2445         fm    = M; /* fm points to first entry in linked list */
2446         for (k=0; k<m; k++) {
2447           currentcol = *rows++;
2448 	  /* is it already in the list? */
2449           do {
2450             mfm  = fm;
2451             fm   = rowhit[fm];
2452           } while (fm < currentcol);
2453           /* not in list so add it */
2454           if (fm != currentcol) {
2455             nrows++;
2456             columnsforrow[currentcol] = col;
2457             /* next three lines insert new entry into linked list */
2458             rowhit[mfm]               = currentcol;
2459             rowhit[currentcol]        = fm;
2460             fm                        = currentcol;
2461             /* fm points to present position in list since we know the columns are sorted */
2462           } else {
2463             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2464           }
2465         }
2466       }
2467       c->nrows[i]         = nrows;
2468       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2469       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2470       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2471       /* now store the linked list of rows into c->rows[i] */
2472       nrows = 0;
2473       fm    = rowhit[M];
2474       do {
2475         c->rows[i][nrows]            = fm;
2476         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2477         fm                           = rowhit[fm];
2478       } while (fm < M);
2479     } /* ---------------------------------------------------------------------------------------*/
2480     ierr = PetscFree(cols);CHKERRQ(ierr);
2481   }
2482 
2483   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2484   /*
2485        vscale will contain the "diagonal" on processor scalings followed by the off processor
2486   */
2487   if (ctype == IS_COLORING_GLOBAL) {
2488     PetscInt *garray;
2489     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2490     for (i=0; i<baij->B->cmap->n/bs; i++) {
2491       for (j=0; j<bs; j++) {
2492         garray[i*bs+j] = bs*baij->garray[i]+j;
2493       }
2494     }
2495     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2496     ierr = PetscFree(garray);CHKERRQ(ierr);
2497     CHKMEMQ;
2498     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2499     for (k=0; k<c->ncolors; k++) {
2500       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2501       for (l=0; l<c->nrows[k]; l++) {
2502         col = c->columnsforrow[k][l];
2503         if (col >= cstart && col < cend) {
2504           /* column is in diagonal block of matrix */
2505           colb = col - cstart;
2506         } else {
2507           /* column  is in "off-processor" part */
2508 #if defined (PETSC_USE_CTABLE)
2509           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2510           colb --;
2511 #else
2512           colb = baij->colmap[col] - 1;
2513 #endif
2514           colb = colb/bs;
2515           colb += cend - cstart;
2516         }
2517         c->vscaleforrow[k][l] = colb;
2518       }
2519     }
2520   } else if (ctype == IS_COLORING_GHOSTED) {
2521     /* Get gtol mapping */
2522     PetscInt N = mat->cmap->N, *gtol;
2523     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2524     for (i=0; i<N; i++) gtol[i] = -1;
2525     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2526 
2527     c->vscale = 0; /* will be created in MatFDColoringApply() */
2528     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2529     for (k=0; k<c->ncolors; k++) {
2530       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2531       for (l=0; l<c->nrows[k]; l++) {
2532         col = c->columnsforrow[k][l];      /* global column index */
2533         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2534       }
2535     }
2536     ierr = PetscFree(gtol);CHKERRQ(ierr);
2537   }
2538   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2539 
2540   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2541   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2542   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2543   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2544     CHKMEMQ;
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2550 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2551 {
2552   Mat            B;
2553   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2554   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2555   Mat_SeqAIJ     *b;
2556   PetscErrorCode ierr;
2557   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2558   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2559   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2560 
2561   PetscFunctionBegin;
2562   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2563   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2564 
2565   /* ----------------------------------------------------------------
2566      Tell every processor the number of nonzeros per row
2567   */
2568   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2569   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2570     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2571   }
2572   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2573   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2574   displs     = recvcounts + size;
2575   for (i=0; i<size; i++) {
2576     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2577     displs[i]     = A->rmap->range[i]/bs;
2578   }
2579 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2580   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2581 #else
2582   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2583 #endif
2584   /* ---------------------------------------------------------------
2585      Create the sequential matrix of the same type as the local block diagonal
2586   */
2587   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2588   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2589   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2590   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2591   b = (Mat_SeqAIJ *)B->data;
2592 
2593   /*--------------------------------------------------------------------
2594     Copy my part of matrix column indices over
2595   */
2596   sendcount  = ad->nz + bd->nz;
2597   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2598   a_jsendbuf = ad->j;
2599   b_jsendbuf = bd->j;
2600   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2601   cnt        = 0;
2602   for (i=0; i<n; i++) {
2603 
2604     /* put in lower diagonal portion */
2605     m = bd->i[i+1] - bd->i[i];
2606     while (m > 0) {
2607       /* is it above diagonal (in bd (compressed) numbering) */
2608       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2609       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2610       m--;
2611     }
2612 
2613     /* put in diagonal portion */
2614     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2615       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2616     }
2617 
2618     /* put in upper diagonal portion */
2619     while (m-- > 0) {
2620       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2621     }
2622   }
2623   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2624 
2625   /*--------------------------------------------------------------------
2626     Gather all column indices to all processors
2627   */
2628   for (i=0; i<size; i++) {
2629     recvcounts[i] = 0;
2630     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2631       recvcounts[i] += lens[j];
2632     }
2633   }
2634   displs[0]  = 0;
2635   for (i=1; i<size; i++) {
2636     displs[i] = displs[i-1] + recvcounts[i-1];
2637   }
2638 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2639   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2640 #else
2641   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2642 #endif
2643   /*--------------------------------------------------------------------
2644     Assemble the matrix into useable form (note numerical values not yet set)
2645   */
2646   /* set the b->ilen (length of each row) values */
2647   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2648   /* set the b->i indices */
2649   b->i[0] = 0;
2650   for (i=1; i<=A->rmap->N/bs; i++) {
2651     b->i[i] = b->i[i-1] + lens[i-1];
2652   }
2653   ierr = PetscFree(lens);CHKERRQ(ierr);
2654   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2657 
2658   if (A->symmetric){
2659     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2660   } else if (A->hermitian) {
2661     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2662   } else if (A->structurally_symmetric) {
2663     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2664   }
2665   *newmat = B;
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 #undef __FUNCT__
2670 #define __FUNCT__ "MatSOR_MPIBAIJ"
2671 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2672 {
2673   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2674   PetscErrorCode ierr;
2675   Vec            bb1 = 0;
2676 
2677   PetscFunctionBegin;
2678   if (flag == SOR_APPLY_UPPER) {
2679     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2680     PetscFunctionReturn(0);
2681   }
2682 
2683   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2684     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2685   }
2686 
2687   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2688     if (flag & SOR_ZERO_INITIAL_GUESS) {
2689       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2690       its--;
2691     }
2692 
2693     while (its--) {
2694       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2695       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2696 
2697       /* update rhs: bb1 = bb - B*x */
2698       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2699       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2700 
2701       /* local sweep */
2702       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2703     }
2704   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2705     if (flag & SOR_ZERO_INITIAL_GUESS) {
2706       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2707       its--;
2708     }
2709     while (its--) {
2710       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2711       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712 
2713       /* update rhs: bb1 = bb - B*x */
2714       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2715       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2716 
2717       /* local sweep */
2718       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2719     }
2720   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2721     if (flag & SOR_ZERO_INITIAL_GUESS) {
2722       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2723       its--;
2724     }
2725     while (its--) {
2726       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2727       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2728 
2729       /* update rhs: bb1 = bb - B*x */
2730       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2731       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2732 
2733       /* local sweep */
2734       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2735     }
2736   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2737 
2738   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2746 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2747 {
2748   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 
2757 /* -------------------------------------------------------------------*/
2758 static struct _MatOps MatOps_Values = {
2759        MatSetValues_MPIBAIJ,
2760        MatGetRow_MPIBAIJ,
2761        MatRestoreRow_MPIBAIJ,
2762        MatMult_MPIBAIJ,
2763 /* 4*/ MatMultAdd_MPIBAIJ,
2764        MatMultTranspose_MPIBAIJ,
2765        MatMultTransposeAdd_MPIBAIJ,
2766        0,
2767        0,
2768        0,
2769 /*10*/ 0,
2770        0,
2771        0,
2772        MatSOR_MPIBAIJ,
2773        MatTranspose_MPIBAIJ,
2774 /*15*/ MatGetInfo_MPIBAIJ,
2775        MatEqual_MPIBAIJ,
2776        MatGetDiagonal_MPIBAIJ,
2777        MatDiagonalScale_MPIBAIJ,
2778        MatNorm_MPIBAIJ,
2779 /*20*/ MatAssemblyBegin_MPIBAIJ,
2780        MatAssemblyEnd_MPIBAIJ,
2781        MatSetOption_MPIBAIJ,
2782        MatZeroEntries_MPIBAIJ,
2783 /*24*/ MatZeroRows_MPIBAIJ,
2784        0,
2785        0,
2786        0,
2787        0,
2788 /*29*/ MatSetUp_MPIBAIJ,
2789        0,
2790        0,
2791        0,
2792        0,
2793 /*34*/ MatDuplicate_MPIBAIJ,
2794        0,
2795        0,
2796        0,
2797        0,
2798 /*39*/ MatAXPY_MPIBAIJ,
2799        MatGetSubMatrices_MPIBAIJ,
2800        MatIncreaseOverlap_MPIBAIJ,
2801        MatGetValues_MPIBAIJ,
2802        MatCopy_MPIBAIJ,
2803 /*44*/ 0,
2804        MatScale_MPIBAIJ,
2805        0,
2806        0,
2807        0,
2808 /*49*/ 0,
2809        0,
2810        0,
2811        0,
2812        0,
2813 /*54*/ MatFDColoringCreate_MPIBAIJ,
2814        0,
2815        MatSetUnfactored_MPIBAIJ,
2816        MatPermute_MPIBAIJ,
2817        MatSetValuesBlocked_MPIBAIJ,
2818 /*59*/ MatGetSubMatrix_MPIBAIJ,
2819        MatDestroy_MPIBAIJ,
2820        MatView_MPIBAIJ,
2821        0,
2822        0,
2823 /*64*/ 0,
2824        0,
2825        0,
2826        0,
2827        0,
2828 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2829        0,
2830        0,
2831        0,
2832        0,
2833 /*74*/ 0,
2834        MatFDColoringApply_BAIJ,
2835        0,
2836        0,
2837        0,
2838 /*79*/ 0,
2839        0,
2840        0,
2841        0,
2842        MatLoad_MPIBAIJ,
2843 /*84*/ 0,
2844        0,
2845        0,
2846        0,
2847        0,
2848 /*89*/ 0,
2849        0,
2850        0,
2851        0,
2852        0,
2853 /*94*/ 0,
2854        0,
2855        0,
2856        0,
2857        0,
2858 /*99*/ 0,
2859        0,
2860        0,
2861        0,
2862        0,
2863 /*104*/0,
2864        MatRealPart_MPIBAIJ,
2865        MatImaginaryPart_MPIBAIJ,
2866        0,
2867        0,
2868 /*109*/0,
2869        0,
2870        0,
2871        0,
2872        0,
2873 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2874        0,
2875        MatGetGhosts_MPIBAIJ,
2876        0,
2877        0,
2878 /*119*/0,
2879        0,
2880        0,
2881        0,
2882        0,
2883 /*124*/0,
2884        0,
2885        MatInvertBlockDiagonal_MPIBAIJ
2886 };
2887 
2888 EXTERN_C_BEGIN
2889 #undef __FUNCT__
2890 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2891 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2892 {
2893   PetscFunctionBegin;
2894   *a = ((Mat_MPIBAIJ *)A->data)->A;
2895   PetscFunctionReturn(0);
2896 }
2897 EXTERN_C_END
2898 
2899 EXTERN_C_BEGIN
2900 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2901 EXTERN_C_END
2902 
2903 EXTERN_C_BEGIN
2904 #undef __FUNCT__
2905 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2906 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2907 {
2908   PetscInt       m,rstart,cstart,cend;
2909   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2910   const PetscInt *JJ=0;
2911   PetscScalar    *values=0;
2912   PetscErrorCode ierr;
2913 
2914   PetscFunctionBegin;
2915   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2916   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2917   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2918   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2919   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2920   m      = B->rmap->n/bs;
2921   rstart = B->rmap->rstart/bs;
2922   cstart = B->cmap->rstart/bs;
2923   cend   = B->cmap->rend/bs;
2924 
2925   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2926   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2927   for (i=0; i<m; i++) {
2928     nz = ii[i+1] - ii[i];
2929     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2930     nz_max = PetscMax(nz_max,nz);
2931     JJ  = jj + ii[i];
2932     for (j=0; j<nz; j++) {
2933       if (*JJ >= cstart) break;
2934       JJ++;
2935     }
2936     d = 0;
2937     for (; j<nz; j++) {
2938       if (*JJ++ >= cend) break;
2939       d++;
2940     }
2941     d_nnz[i] = d;
2942     o_nnz[i] = nz - d;
2943   }
2944   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2945   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2946 
2947   values = (PetscScalar*)V;
2948   if (!values) {
2949     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2950     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2951   }
2952   for (i=0; i<m; i++) {
2953     PetscInt          row    = i + rstart;
2954     PetscInt          ncols  = ii[i+1] - ii[i];
2955     const PetscInt    *icols = jj + ii[i];
2956     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2957     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2958   }
2959 
2960   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2961   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2963   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 EXTERN_C_END
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2970 /*@C
2971    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2972    (the default parallel PETSc format).
2973 
2974    Collective on MPI_Comm
2975 
2976    Input Parameters:
2977 +  A - the matrix
2978 .  bs - the block size
2979 .  i - the indices into j for the start of each local row (starts with zero)
2980 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2981 -  v - optional values in the matrix
2982 
2983    Level: developer
2984 
2985 .keywords: matrix, aij, compressed row, sparse, parallel
2986 
2987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2988 @*/
2989 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2990 {
2991   PetscErrorCode ierr;
2992 
2993   PetscFunctionBegin;
2994   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2995   PetscValidType(B,1);
2996   PetscValidLogicalCollectiveInt(B,bs,2);
2997   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 EXTERN_C_BEGIN
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3004 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3005 {
3006   Mat_MPIBAIJ    *b;
3007   PetscErrorCode ierr;
3008   PetscInt       i;
3009   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3010 
3011   PetscFunctionBegin;
3012   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3013   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3014 
3015   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3016   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3017   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3018   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3019 
3020   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3021   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3022   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3023   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3024   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3025 
3026   if (d_nnz) {
3027     for (i=0; i<B->rmap->n/bs; i++) {
3028       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3029     }
3030   }
3031   if (o_nnz) {
3032     for (i=0; i<B->rmap->n/bs; i++) {
3033       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3034     }
3035   }
3036 
3037   b = (Mat_MPIBAIJ*)B->data;
3038   b->bs2 = bs*bs;
3039   b->mbs = B->rmap->n/bs;
3040   b->nbs = B->cmap->n/bs;
3041   b->Mbs = B->rmap->N/bs;
3042   b->Nbs = B->cmap->N/bs;
3043 
3044   for (i=0; i<=b->size; i++) {
3045     b->rangebs[i] = B->rmap->range[i]/bs;
3046   }
3047   b->rstartbs = B->rmap->rstart/bs;
3048   b->rendbs   = B->rmap->rend/bs;
3049   b->cstartbs = B->cmap->rstart/bs;
3050   b->cendbs   = B->cmap->rend/bs;
3051 
3052   if (!B->preallocated) {
3053     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3054     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3055     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3056     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3057     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3058     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3059     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3060     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3061     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3062   }
3063 
3064   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3065   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3066   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3067   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3068   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3069   B->preallocated = PETSC_TRUE;
3070   PetscFunctionReturn(0);
3071 }
3072 EXTERN_C_END
3073 
3074 EXTERN_C_BEGIN
3075 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3076 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3077 EXTERN_C_END
3078 
3079 
3080 EXTERN_C_BEGIN
3081 #undef __FUNCT__
3082 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3083 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3084 {
3085   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3086   PetscErrorCode ierr;
3087   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3088   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3089   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3090 
3091   PetscFunctionBegin;
3092   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3093   ii[0] = 0;
3094   CHKMEMQ;
3095   for (i=0; i<M; i++) {
3096     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3097     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3098     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3099     /* remove one from count of matrix has diagonal */
3100     for (j=id[i]; j<id[i+1]; j++) {
3101       if (jd[j] == i) {ii[i+1]--;break;}
3102     }
3103   CHKMEMQ;
3104   }
3105   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3106   cnt = 0;
3107   for (i=0; i<M; i++) {
3108     for (j=io[i]; j<io[i+1]; j++) {
3109       if (garray[jo[j]] > rstart) break;
3110       jj[cnt++] = garray[jo[j]];
3111   CHKMEMQ;
3112     }
3113     for (k=id[i]; k<id[i+1]; k++) {
3114       if (jd[k] != i) {
3115         jj[cnt++] = rstart + jd[k];
3116   CHKMEMQ;
3117       }
3118     }
3119     for (;j<io[i+1]; j++) {
3120       jj[cnt++] = garray[jo[j]];
3121   CHKMEMQ;
3122     }
3123   }
3124   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3125   PetscFunctionReturn(0);
3126 }
3127 EXTERN_C_END
3128 
3129 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3130 EXTERN_C_BEGIN
3131 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3132 EXTERN_C_END
3133 
3134 EXTERN_C_BEGIN
3135 #undef __FUNCT__
3136 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3137 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3138 {
3139   PetscErrorCode ierr;
3140   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3141   Mat            B;
3142   Mat_MPIAIJ     *b;
3143 
3144   PetscFunctionBegin;
3145   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3146 
3147   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3148   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3149   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3150   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3151   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3152   b = (Mat_MPIAIJ*) B->data;
3153 
3154   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3155   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3156   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3157   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3158   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3159   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   if (reuse == MAT_REUSE_MATRIX) {
3164     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3165   } else {
3166    *newmat = B;
3167   }
3168   PetscFunctionReturn(0);
3169 }
3170 EXTERN_C_END
3171 
3172 EXTERN_C_BEGIN
3173 #if defined(PETSC_HAVE_MUMPS)
3174 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3175 #endif
3176 EXTERN_C_END
3177 
3178 /*MC
3179    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3180 
3181    Options Database Keys:
3182 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3183 . -mat_block_size <bs> - set the blocksize used to store the matrix
3184 - -mat_use_hash_table <fact>
3185 
3186   Level: beginner
3187 
3188 .seealso: MatCreateMPIBAIJ
3189 M*/
3190 
3191 EXTERN_C_BEGIN
3192 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3193 EXTERN_C_END
3194 
3195 EXTERN_C_BEGIN
3196 #undef __FUNCT__
3197 #define __FUNCT__ "MatCreate_MPIBAIJ"
3198 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3199 {
3200   Mat_MPIBAIJ    *b;
3201   PetscErrorCode ierr;
3202   PetscBool      flg;
3203 
3204   PetscFunctionBegin;
3205   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3206   B->data = (void*)b;
3207 
3208   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3209   B->assembled  = PETSC_FALSE;
3210 
3211   B->insertmode = NOT_SET_VALUES;
3212   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3213   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3214 
3215   /* build local table of row and column ownerships */
3216   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3217 
3218   /* build cache for off array entries formed */
3219   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3220   b->donotstash  = PETSC_FALSE;
3221   b->colmap      = PETSC_NULL;
3222   b->garray      = PETSC_NULL;
3223   b->roworiented = PETSC_TRUE;
3224 
3225   /* stuff used in block assembly */
3226   b->barray       = 0;
3227 
3228   /* stuff used for matrix vector multiply */
3229   b->lvec         = 0;
3230   b->Mvctx        = 0;
3231 
3232   /* stuff for MatGetRow() */
3233   b->rowindices   = 0;
3234   b->rowvalues    = 0;
3235   b->getrowactive = PETSC_FALSE;
3236 
3237   /* hash table stuff */
3238   b->ht           = 0;
3239   b->hd           = 0;
3240   b->ht_size      = 0;
3241   b->ht_flag      = PETSC_FALSE;
3242   b->ht_fact      = 0;
3243   b->ht_total_ct  = 0;
3244   b->ht_insert_ct = 0;
3245 
3246   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3247   b->ijonly       = PETSC_FALSE;
3248 
3249   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3250     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3251     if (flg) {
3252       PetscReal fact = 1.39;
3253       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3254       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3255       if (fact <= 1.0) fact = 1.39;
3256       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3257       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3258     }
3259   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3260 
3261 #if defined(PETSC_HAVE_MUMPS)
3262   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3263 #endif
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3265                                      "MatConvert_MPIBAIJ_MPIAdj",
3266                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3268                                      "MatConvert_MPIBAIJ_MPIAIJ",
3269                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3271                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3272                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3274                                      "MatStoreValues_MPIBAIJ",
3275                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3277                                      "MatRetrieveValues_MPIBAIJ",
3278                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3280                                      "MatGetDiagonalBlock_MPIBAIJ",
3281                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3283                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3284                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3286 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3287 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3289                                      "MatDiagonalScaleLocal_MPIBAIJ",
3290                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3292                                      "MatSetHashTableFactor_MPIBAIJ",
3293                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3295                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3296                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3297   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3298   PetscFunctionReturn(0);
3299 }
3300 EXTERN_C_END
3301 
3302 /*MC
3303    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3304 
3305    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3306    and MATMPIBAIJ otherwise.
3307 
3308    Options Database Keys:
3309 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3310 
3311   Level: beginner
3312 
3313 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3314 M*/
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3318 /*@C
3319    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3320    (block compressed row).  For good matrix assembly performance
3321    the user should preallocate the matrix storage by setting the parameters
3322    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3323    performance can be increased by more than a factor of 50.
3324 
3325    Collective on Mat
3326 
3327    Input Parameters:
3328 +  A - the matrix
3329 .  bs   - size of blockk
3330 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3331            submatrix  (same for all local rows)
3332 .  d_nnz - array containing the number of block nonzeros in the various block rows
3333            of the in diagonal portion of the local (possibly different for each block
3334            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3335            set it even if it is zero.
3336 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3337            submatrix (same for all local rows).
3338 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3339            off-diagonal portion of the local submatrix (possibly different for
3340            each block row) or PETSC_NULL.
3341 
3342    If the *_nnz parameter is given then the *_nz parameter is ignored
3343 
3344    Options Database Keys:
3345 +   -mat_block_size - size of the blocks to use
3346 -   -mat_use_hash_table <fact>
3347 
3348    Notes:
3349    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3350    than it must be used on all processors that share the object for that argument.
3351 
3352    Storage Information:
3353    For a square global matrix we define each processor's diagonal portion
3354    to be its local rows and the corresponding columns (a square submatrix);
3355    each processor's off-diagonal portion encompasses the remainder of the
3356    local matrix (a rectangular submatrix).
3357 
3358    The user can specify preallocated storage for the diagonal part of
3359    the local submatrix with either d_nz or d_nnz (not both).  Set
3360    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3361    memory allocation.  Likewise, specify preallocated storage for the
3362    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3363 
3364    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3365    the figure below we depict these three local rows and all columns (0-11).
3366 
3367 .vb
3368            0 1 2 3 4 5 6 7 8 9 10 11
3369           -------------------
3370    row 3  |  o o o d d d o o o o o o
3371    row 4  |  o o o d d d o o o o o o
3372    row 5  |  o o o d d d o o o o o o
3373           -------------------
3374 .ve
3375 
3376    Thus, any entries in the d locations are stored in the d (diagonal)
3377    submatrix, and any entries in the o locations are stored in the
3378    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3379    stored simply in the MATSEQBAIJ format for compressed row storage.
3380 
3381    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3382    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3383    In general, for PDE problems in which most nonzeros are near the diagonal,
3384    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3385    or you will get TERRIBLE performance; see the users' manual chapter on
3386    matrices.
3387 
3388    You can call MatGetInfo() to get information on how effective the preallocation was;
3389    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3390    You can also run with the option -info and look for messages with the string
3391    malloc in them to see if additional memory allocation was needed.
3392 
3393    Level: intermediate
3394 
3395 .keywords: matrix, block, aij, compressed row, sparse, parallel
3396 
3397 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3398 @*/
3399 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3400 {
3401   PetscErrorCode ierr;
3402 
3403   PetscFunctionBegin;
3404   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3405   PetscValidType(B,1);
3406   PetscValidLogicalCollectiveInt(B,bs,2);
3407   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3408   PetscFunctionReturn(0);
3409 }
3410 
3411 #undef __FUNCT__
3412 #define __FUNCT__ "MatCreateBAIJ"
3413 /*@C
3414    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3415    (block compressed row).  For good matrix assembly performance
3416    the user should preallocate the matrix storage by setting the parameters
3417    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3418    performance can be increased by more than a factor of 50.
3419 
3420    Collective on MPI_Comm
3421 
3422    Input Parameters:
3423 +  comm - MPI communicator
3424 .  bs   - size of blockk
3425 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3426            This value should be the same as the local size used in creating the
3427            y vector for the matrix-vector product y = Ax.
3428 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3429            This value should be the same as the local size used in creating the
3430            x vector for the matrix-vector product y = Ax.
3431 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3432 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3433 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3434            submatrix  (same for all local rows)
3435 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3436            of the in diagonal portion of the local (possibly different for each block
3437            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3438            and set it even if it is zero.
3439 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3440            submatrix (same for all local rows).
3441 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3442            off-diagonal portion of the local submatrix (possibly different for
3443            each block row) or PETSC_NULL.
3444 
3445    Output Parameter:
3446 .  A - the matrix
3447 
3448    Options Database Keys:
3449 +   -mat_block_size - size of the blocks to use
3450 -   -mat_use_hash_table <fact>
3451 
3452    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3453    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3454    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3455 
3456    Notes:
3457    If the *_nnz parameter is given then the *_nz parameter is ignored
3458 
3459    A nonzero block is any block that as 1 or more nonzeros in it
3460 
3461    The user MUST specify either the local or global matrix dimensions
3462    (possibly both).
3463 
3464    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3465    than it must be used on all processors that share the object for that argument.
3466 
3467    Storage Information:
3468    For a square global matrix we define each processor's diagonal portion
3469    to be its local rows and the corresponding columns (a square submatrix);
3470    each processor's off-diagonal portion encompasses the remainder of the
3471    local matrix (a rectangular submatrix).
3472 
3473    The user can specify preallocated storage for the diagonal part of
3474    the local submatrix with either d_nz or d_nnz (not both).  Set
3475    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3476    memory allocation.  Likewise, specify preallocated storage for the
3477    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3478 
3479    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3480    the figure below we depict these three local rows and all columns (0-11).
3481 
3482 .vb
3483            0 1 2 3 4 5 6 7 8 9 10 11
3484           -------------------
3485    row 3  |  o o o d d d o o o o o o
3486    row 4  |  o o o d d d o o o o o o
3487    row 5  |  o o o d d d o o o o o o
3488           -------------------
3489 .ve
3490 
3491    Thus, any entries in the d locations are stored in the d (diagonal)
3492    submatrix, and any entries in the o locations are stored in the
3493    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3494    stored simply in the MATSEQBAIJ format for compressed row storage.
3495 
3496    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3497    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3498    In general, for PDE problems in which most nonzeros are near the diagonal,
3499    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3500    or you will get TERRIBLE performance; see the users' manual chapter on
3501    matrices.
3502 
3503    Level: intermediate
3504 
3505 .keywords: matrix, block, aij, compressed row, sparse, parallel
3506 
3507 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3508 @*/
3509 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3510 {
3511   PetscErrorCode ierr;
3512   PetscMPIInt    size;
3513 
3514   PetscFunctionBegin;
3515   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3516   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3518   if (size > 1) {
3519     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3520     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3521   } else {
3522     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3523     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3524   }
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 #undef __FUNCT__
3529 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3530 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3531 {
3532   Mat            mat;
3533   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3534   PetscErrorCode ierr;
3535   PetscInt       len=0;
3536 
3537   PetscFunctionBegin;
3538   *newmat       = 0;
3539   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3540   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3541   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3542   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3543 
3544   mat->factortype   = matin->factortype;
3545   mat->preallocated = PETSC_TRUE;
3546   mat->assembled    = PETSC_TRUE;
3547   mat->insertmode   = NOT_SET_VALUES;
3548 
3549   a      = (Mat_MPIBAIJ*)mat->data;
3550   mat->rmap->bs  = matin->rmap->bs;
3551   a->bs2   = oldmat->bs2;
3552   a->mbs   = oldmat->mbs;
3553   a->nbs   = oldmat->nbs;
3554   a->Mbs   = oldmat->Mbs;
3555   a->Nbs   = oldmat->Nbs;
3556 
3557   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3558   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3559 
3560   a->size         = oldmat->size;
3561   a->rank         = oldmat->rank;
3562   a->donotstash   = oldmat->donotstash;
3563   a->roworiented  = oldmat->roworiented;
3564   a->rowindices   = 0;
3565   a->rowvalues    = 0;
3566   a->getrowactive = PETSC_FALSE;
3567   a->barray       = 0;
3568   a->rstartbs     = oldmat->rstartbs;
3569   a->rendbs       = oldmat->rendbs;
3570   a->cstartbs     = oldmat->cstartbs;
3571   a->cendbs       = oldmat->cendbs;
3572 
3573   /* hash table stuff */
3574   a->ht           = 0;
3575   a->hd           = 0;
3576   a->ht_size      = 0;
3577   a->ht_flag      = oldmat->ht_flag;
3578   a->ht_fact      = oldmat->ht_fact;
3579   a->ht_total_ct  = 0;
3580   a->ht_insert_ct = 0;
3581 
3582   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3583   if (oldmat->colmap) {
3584 #if defined (PETSC_USE_CTABLE)
3585   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3586 #else
3587   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3588   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3589   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3590 #endif
3591   } else a->colmap = 0;
3592 
3593   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3594     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3595     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3596     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3597   } else a->garray = 0;
3598 
3599   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3600   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3601   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3602   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3603   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3604 
3605   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3606   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3607   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3608   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3609   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3610   *newmat = mat;
3611 
3612   PetscFunctionReturn(0);
3613 }
3614 
3615 #undef __FUNCT__
3616 #define __FUNCT__ "MatLoad_MPIBAIJ"
3617 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3618 {
3619   PetscErrorCode ierr;
3620   int            fd;
3621   PetscInt       i,nz,j,rstart,rend;
3622   PetscScalar    *vals,*buf;
3623   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3624   MPI_Status     status;
3625   PetscMPIInt    rank,size,maxnz;
3626   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3627   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3628   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3629   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3630   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3631   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3632 
3633   PetscFunctionBegin;
3634   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3635     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3636   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3637 
3638   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3639   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3640   if (!rank) {
3641     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3642     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3643     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3644   }
3645 
3646   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3647 
3648   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3649   M = header[1]; N = header[2];
3650 
3651   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3652   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3653   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3654 
3655   /* If global sizes are set, check if they are consistent with that given in the file */
3656   if (sizesset) {
3657     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3658   }
3659   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3660   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3661 
3662   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3663 
3664   /*
3665      This code adds extra rows to make sure the number of rows is
3666      divisible by the blocksize
3667   */
3668   Mbs        = M/bs;
3669   extra_rows = bs - M + bs*Mbs;
3670   if (extra_rows == bs) extra_rows = 0;
3671   else                  Mbs++;
3672   if (extra_rows && !rank) {
3673     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3674   }
3675 
3676   /* determine ownership of all rows */
3677   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3678     mbs        = Mbs/size + ((Mbs % size) > rank);
3679     m          = mbs*bs;
3680   } else { /* User set */
3681     m          = newmat->rmap->n;
3682     mbs        = m/bs;
3683   }
3684   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3685   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3686 
3687   /* process 0 needs enough room for process with most rows */
3688   if (!rank) {
3689     mmax = rowners[1];
3690     for (i=2; i<=size; i++) {
3691       mmax = PetscMax(mmax,rowners[i]);
3692     }
3693     mmax*=bs;
3694   } else mmax = m;
3695 
3696   rowners[0] = 0;
3697   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3698   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3699   rstart = rowners[rank];
3700   rend   = rowners[rank+1];
3701 
3702   /* distribute row lengths to all processors */
3703   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3704   if (!rank) {
3705     mend = m;
3706     if (size == 1) mend = mend - extra_rows;
3707     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3708     for (j=mend; j<m; j++) locrowlens[j] = 1;
3709     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3710     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3711     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3712     for (j=0; j<m; j++) {
3713       procsnz[0] += locrowlens[j];
3714     }
3715     for (i=1; i<size; i++) {
3716       mend = browners[i+1] - browners[i];
3717       if (i == size-1) mend = mend - extra_rows;
3718       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3719       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3720       /* calculate the number of nonzeros on each processor */
3721       for (j=0; j<browners[i+1]-browners[i]; j++) {
3722         procsnz[i] += rowlengths[j];
3723       }
3724       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3725     }
3726     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3727   } else {
3728     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3729   }
3730 
3731   if (!rank) {
3732     /* determine max buffer needed and allocate it */
3733     maxnz = procsnz[0];
3734     for (i=1; i<size; i++) {
3735       maxnz = PetscMax(maxnz,procsnz[i]);
3736     }
3737     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3738 
3739     /* read in my part of the matrix column indices  */
3740     nz     = procsnz[0];
3741     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3742     mycols = ibuf;
3743     if (size == 1)  nz -= extra_rows;
3744     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3745     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3746 
3747     /* read in every ones (except the last) and ship off */
3748     for (i=1; i<size-1; i++) {
3749       nz   = procsnz[i];
3750       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3751       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3752     }
3753     /* read in the stuff for the last proc */
3754     if (size != 1) {
3755       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3756       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3757       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3758       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3759     }
3760     ierr = PetscFree(cols);CHKERRQ(ierr);
3761   } else {
3762     /* determine buffer space needed for message */
3763     nz = 0;
3764     for (i=0; i<m; i++) {
3765       nz += locrowlens[i];
3766     }
3767     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3768     mycols = ibuf;
3769     /* receive message of column indices*/
3770     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3771     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3772     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3773   }
3774 
3775   /* loop over local rows, determining number of off diagonal entries */
3776   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3777   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3778   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3779   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3780   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3781   rowcount = 0; nzcount = 0;
3782   for (i=0; i<mbs; i++) {
3783     dcount  = 0;
3784     odcount = 0;
3785     for (j=0; j<bs; j++) {
3786       kmax = locrowlens[rowcount];
3787       for (k=0; k<kmax; k++) {
3788         tmp = mycols[nzcount++]/bs;
3789         if (!mask[tmp]) {
3790           mask[tmp] = 1;
3791           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3792           else masked1[dcount++] = tmp;
3793         }
3794       }
3795       rowcount++;
3796     }
3797 
3798     dlens[i]  = dcount;
3799     odlens[i] = odcount;
3800 
3801     /* zero out the mask elements we set */
3802     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3803     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3804   }
3805 
3806 
3807   if (!sizesset) {
3808     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3809   }
3810   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3811 
3812   if (!rank) {
3813     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3814     /* read in my part of the matrix numerical values  */
3815     nz = procsnz[0];
3816     vals = buf;
3817     mycols = ibuf;
3818     if (size == 1)  nz -= extra_rows;
3819     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3820     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3821 
3822     /* insert into matrix */
3823     jj      = rstart*bs;
3824     for (i=0; i<m; i++) {
3825       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3826       mycols += locrowlens[i];
3827       vals   += locrowlens[i];
3828       jj++;
3829     }
3830     /* read in other processors (except the last one) and ship out */
3831     for (i=1; i<size-1; i++) {
3832       nz   = procsnz[i];
3833       vals = buf;
3834       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3835       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3836     }
3837     /* the last proc */
3838     if (size != 1){
3839       nz   = procsnz[i] - extra_rows;
3840       vals = buf;
3841       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3842       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3843       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3844     }
3845     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3846   } else {
3847     /* receive numeric values */
3848     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3849 
3850     /* receive message of values*/
3851     vals   = buf;
3852     mycols = ibuf;
3853     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3854 
3855     /* insert into matrix */
3856     jj      = rstart*bs;
3857     for (i=0; i<m; i++) {
3858       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3859       mycols += locrowlens[i];
3860       vals   += locrowlens[i];
3861       jj++;
3862     }
3863   }
3864   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3865   ierr = PetscFree(buf);CHKERRQ(ierr);
3866   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3867   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3868   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3869   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3870   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872 
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 #undef __FUNCT__
3877 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3878 /*@
3879    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3880 
3881    Input Parameters:
3882 .  mat  - the matrix
3883 .  fact - factor
3884 
3885    Not Collective, each process can use a different factor
3886 
3887    Level: advanced
3888 
3889   Notes:
3890    This can also be set by the command line option: -mat_use_hash_table <fact>
3891 
3892 .keywords: matrix, hashtable, factor, HT
3893 
3894 .seealso: MatSetOption()
3895 @*/
3896 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3897 {
3898   PetscErrorCode ierr;
3899 
3900   PetscFunctionBegin;
3901   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 EXTERN_C_BEGIN
3906 #undef __FUNCT__
3907 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3908 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3909 {
3910   Mat_MPIBAIJ *baij;
3911 
3912   PetscFunctionBegin;
3913   baij = (Mat_MPIBAIJ*)mat->data;
3914   baij->ht_fact = fact;
3915   PetscFunctionReturn(0);
3916 }
3917 EXTERN_C_END
3918 
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3921 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3922 {
3923   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3924   PetscFunctionBegin;
3925   *Ad     = a->A;
3926   *Ao     = a->B;
3927   *colmap = a->garray;
3928   PetscFunctionReturn(0);
3929 }
3930 
3931 /*
3932     Special version for direct calls from Fortran (to eliminate two function call overheads
3933 */
3934 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3935 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3936 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3937 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3938 #endif
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3942 /*@C
3943   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3944 
3945   Collective on Mat
3946 
3947   Input Parameters:
3948 + mat - the matrix
3949 . min - number of input rows
3950 . im - input rows
3951 . nin - number of input columns
3952 . in - input columns
3953 . v - numerical values input
3954 - addvin - INSERT_VALUES or ADD_VALUES
3955 
3956   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3957 
3958   Level: advanced
3959 
3960 .seealso:   MatSetValuesBlocked()
3961 @*/
3962 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3963 {
3964   /* convert input arguments to C version */
3965   Mat             mat = *matin;
3966   PetscInt        m = *min, n = *nin;
3967   InsertMode      addv = *addvin;
3968 
3969   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3970   const MatScalar *value;
3971   MatScalar       *barray=baij->barray;
3972   PetscBool       roworiented = baij->roworiented;
3973   PetscErrorCode  ierr;
3974   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3975   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3976   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3977 
3978   PetscFunctionBegin;
3979   /* tasks normally handled by MatSetValuesBlocked() */
3980   if (mat->insertmode == NOT_SET_VALUES) {
3981     mat->insertmode = addv;
3982   }
3983 #if defined(PETSC_USE_DEBUG)
3984   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3985   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3986 #endif
3987   if (mat->assembled) {
3988     mat->was_assembled = PETSC_TRUE;
3989     mat->assembled     = PETSC_FALSE;
3990   }
3991   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3992 
3993 
3994   if (!barray) {
3995     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3996     baij->barray = barray;
3997   }
3998 
3999   if (roworiented) {
4000     stepval = (n-1)*bs;
4001   } else {
4002     stepval = (m-1)*bs;
4003   }
4004   for (i=0; i<m; i++) {
4005     if (im[i] < 0) continue;
4006 #if defined(PETSC_USE_DEBUG)
4007     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4008 #endif
4009     if (im[i] >= rstart && im[i] < rend) {
4010       row = im[i] - rstart;
4011       for (j=0; j<n; j++) {
4012         /* If NumCol = 1 then a copy is not required */
4013         if ((roworiented) && (n == 1)) {
4014           barray = (MatScalar*)v + i*bs2;
4015         } else if ((!roworiented) && (m == 1)) {
4016           barray = (MatScalar*)v + j*bs2;
4017         } else { /* Here a copy is required */
4018           if (roworiented) {
4019             value = v + i*(stepval+bs)*bs + j*bs;
4020           } else {
4021             value = v + j*(stepval+bs)*bs + i*bs;
4022           }
4023           for (ii=0; ii<bs; ii++,value+=stepval) {
4024             for (jj=0; jj<bs; jj++) {
4025               *barray++  = *value++;
4026             }
4027           }
4028           barray -=bs2;
4029         }
4030 
4031         if (in[j] >= cstart && in[j] < cend){
4032           col  = in[j] - cstart;
4033           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4034         }
4035         else if (in[j] < 0) continue;
4036 #if defined(PETSC_USE_DEBUG)
4037         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4038 #endif
4039         else {
4040           if (mat->was_assembled) {
4041             if (!baij->colmap) {
4042               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4043             }
4044 
4045 #if defined(PETSC_USE_DEBUG)
4046 #if defined (PETSC_USE_CTABLE)
4047             { PetscInt data;
4048               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4049               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4050             }
4051 #else
4052             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4053 #endif
4054 #endif
4055 #if defined (PETSC_USE_CTABLE)
4056 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4057             col  = (col - 1)/bs;
4058 #else
4059             col = (baij->colmap[in[j]] - 1)/bs;
4060 #endif
4061             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4062               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4063               col =  in[j];
4064             }
4065           }
4066           else col = in[j];
4067           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4068         }
4069       }
4070     } else {
4071       if (!baij->donotstash) {
4072         if (roworiented) {
4073           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4074         } else {
4075           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4076         }
4077       }
4078     }
4079   }
4080 
4081   /* task normally handled by MatSetValuesBlocked() */
4082   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4083   PetscFunctionReturn(0);
4084 }
4085 
4086 #undef __FUNCT__
4087 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4088 /*@
4089      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4090          CSR format the local rows.
4091 
4092    Collective on MPI_Comm
4093 
4094    Input Parameters:
4095 +  comm - MPI communicator
4096 .  bs - the block size, only a block size of 1 is supported
4097 .  m - number of local rows (Cannot be PETSC_DECIDE)
4098 .  n - This value should be the same as the local size used in creating the
4099        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4100        calculated if N is given) For square matrices n is almost always m.
4101 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4102 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4103 .   i - row indices
4104 .   j - column indices
4105 -   a - matrix values
4106 
4107    Output Parameter:
4108 .   mat - the matrix
4109 
4110    Level: intermediate
4111 
4112    Notes:
4113        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4114      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4115      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4116 
4117        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4118 
4119 .keywords: matrix, aij, compressed row, sparse, parallel
4120 
4121 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4122           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4123 @*/
4124 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4125 {
4126   PetscErrorCode ierr;
4127 
4128 
4129  PetscFunctionBegin;
4130   if (i[0]) {
4131     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4132   }
4133   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4134   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4135   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4136   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4137   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4138   PetscFunctionReturn(0);
4139 }
4140