xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2dcb2ebcaafef69cb84ce45490ea8f682eb02719)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscBool      roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscBool         roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscBool         roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscBool      r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscBool         iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084   PetscInt       message_count,flowcontrolcount;
1085 
1086   PetscFunctionBegin;
1087   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1089   nz   = bs2*(A->nz + B->nz);
1090   rlen = mat->rmap->n;
1091   if (!rank) {
1092     header[0] = MAT_FILE_CLASSID;
1093     header[1] = mat->rmap->N;
1094     header[2] = mat->cmap->N;
1095     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1097     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1098     /* get largest number of rows any processor has */
1099     range = mat->rmap->range;
1100     for (i=1; i<size; i++) {
1101       rlen = PetscMax(rlen,range[i+1] - range[i]);
1102     }
1103   } else {
1104     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1105   }
1106 
1107   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1108   /* compute lengths of each row  */
1109   count = 0;
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1295   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1296 #if defined (PETSC_USE_CTABLE)
1297   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1303   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(baij);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatMult_MPIBAIJ"
1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1324 {
1325   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327   PetscInt       nt;
1328 
1329   PetscFunctionBegin;
1330   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1331   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1332   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1333   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1334   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1336   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1344 {
1345   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1351   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1359 {
1360   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1361   PetscErrorCode ierr;
1362   PetscBool      merged;
1363 
1364   PetscFunctionBegin;
1365   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1366   /* do nondiagonal part */
1367   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1368   if (!merged) {
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* do local part */
1372     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1373     /* receive remote parts: note this assumes the values are not actually */
1374     /* inserted in yy until the next line */
1375     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1376   } else {
1377     /* do local part */
1378     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1379     /* send it on its way */
1380     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381     /* values actually were received in the Begin() but we need to call this nop */
1382     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1390 {
1391   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   /* do nondiagonal part */
1396   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1397   /* send it on its way */
1398   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   /* do local part */
1400   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1401   /* receive remote parts: note this assumes the values are not actually */
1402   /* inserted in yy until the next line, which is true for my implementation*/
1403   /* but is not perhaps always true. */
1404   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /*
1409   This only works correctly for square matrices where the subblock A->A is the
1410    diagonal block
1411 */
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1421   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatScale_MPIBAIJ"
1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1428 {
1429   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1430   PetscErrorCode ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1434   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1441 {
1442   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1443   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1444   PetscErrorCode ierr;
1445   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1446   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1447   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1448 
1449   PetscFunctionBegin;
1450   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1451   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1452   mat->getrowactive = PETSC_TRUE;
1453 
1454   if (!mat->rowvalues && (idx || v)) {
1455     /*
1456         allocate enough space to hold information from the longest row.
1457     */
1458     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1459     PetscInt     max = 1,mbs = mat->mbs,tmp;
1460     for (i=0; i<mbs; i++) {
1461       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1462       if (max < tmp) { max = tmp; }
1463     }
1464     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1465   }
1466   lrow = row - brstart;
1467 
1468   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1469   if (!v)   {pvA = 0; pvB = 0;}
1470   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1471   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1472   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1473   nztot = nzA + nzB;
1474 
1475   cmap  = mat->garray;
1476   if (v  || idx) {
1477     if (nztot) {
1478       /* Sort by increasing column numbers, assuming A and B already sorted */
1479       PetscInt imark = -1;
1480       if (v) {
1481         *v = v_p = mat->rowvalues;
1482         for (i=0; i<nzB; i++) {
1483           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1484           else break;
1485         }
1486         imark = i;
1487         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1488         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1489       }
1490       if (idx) {
1491         *idx = idx_p = mat->rowindices;
1492         if (imark > -1) {
1493           for (i=0; i<imark; i++) {
1494             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1495           }
1496         } else {
1497           for (i=0; i<nzB; i++) {
1498             if (cmap[cworkB[i]/bs] < cstart)
1499               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1500             else break;
1501           }
1502           imark = i;
1503         }
1504         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1505         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1506       }
1507     } else {
1508       if (idx) *idx = 0;
1509       if (v)   *v   = 0;
1510     }
1511   }
1512   *nz = nztot;
1513   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1514   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1523 
1524   PetscFunctionBegin;
1525   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1526   baij->getrowactive = PETSC_FALSE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1533 {
1534   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1539   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1546 {
1547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1548   Mat            A = a->A,B = a->B;
1549   PetscErrorCode ierr;
1550   PetscReal      isend[5],irecv[5];
1551 
1552   PetscFunctionBegin;
1553   info->block_size     = (PetscReal)matin->rmap->bs;
1554   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1555   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1556   isend[3] = info->memory;  isend[4] = info->mallocs;
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559   isend[3] += info->memory;  isend[4] += info->mallocs;
1560   if (flag == MAT_LOCAL) {
1561     info->nz_used      = isend[0];
1562     info->nz_allocated = isend[1];
1563     info->nz_unneeded  = isend[2];
1564     info->memory       = isend[3];
1565     info->mallocs      = isend[4];
1566   } else if (flag == MAT_GLOBAL_MAX) {
1567     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1575     info->nz_used      = irecv[0];
1576     info->nz_allocated = irecv[1];
1577     info->nz_unneeded  = irecv[2];
1578     info->memory       = irecv[3];
1579     info->mallocs      = irecv[4];
1580   } else {
1581     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   }
1583   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1584   info->fill_ratio_needed = 0;
1585   info->factor_mallocs    = 0;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_ROW_ORIENTED:
1607     a->roworiented = flg;
1608     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1609     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1610     break;
1611   case MAT_NEW_DIAGONALS:
1612     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1613     break;
1614   case MAT_IGNORE_OFF_PROC_ENTRIES:
1615     a->donotstash = flg;
1616     break;
1617   case MAT_USE_HASH_TABLE:
1618     a->ht_flag = flg;
1619     break;
1620   case MAT_SYMMETRIC:
1621   case MAT_STRUCTURALLY_SYMMETRIC:
1622   case MAT_HERMITIAN:
1623   case MAT_SYMMETRY_ETERNAL:
1624     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1625     break;
1626   default:
1627     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1635 {
1636   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1637   Mat_SeqBAIJ    *Aloc;
1638   Mat            B;
1639   PetscErrorCode ierr;
1640   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1641   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1642   MatScalar      *a;
1643 
1644   PetscFunctionBegin;
1645   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1648     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1649     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1650     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1651   } else {
1652     B = *matout;
1653   }
1654 
1655   /* copy over the A part */
1656   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1657   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1659 
1660   for (i=0; i<mbs; i++) {
1661     rvals[0] = bs*(baij->rstartbs + i);
1662     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1663     for (j=ai[i]; j<ai[i+1]; j++) {
1664       col = (baij->cstartbs+aj[j])*bs;
1665       for (k=0; k<bs; k++) {
1666         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1667         col++; a += bs;
1668       }
1669     }
1670   }
1671   /* copy over the B part */
1672   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1673   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1674   for (i=0; i<mbs; i++) {
1675     rvals[0] = bs*(baij->rstartbs + i);
1676     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1677     for (j=ai[i]; j<ai[i+1]; j++) {
1678       col = baij->garray[aj[j]]*bs;
1679       for (k=0; k<bs; k++) {
1680         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1681         col++; a += bs;
1682       }
1683     }
1684   }
1685   ierr = PetscFree(rvals);CHKERRQ(ierr);
1686   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688 
1689   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1690     *matout = B;
1691   } else {
1692     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1700 {
1701   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1702   Mat            a = baij->A,b = baij->B;
1703   PetscErrorCode ierr;
1704   PetscInt       s1,s2,s3;
1705 
1706   PetscFunctionBegin;
1707   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1708   if (rr) {
1709     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1710     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1711     /* Overlap communication with computation. */
1712     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1713   }
1714   if (ll) {
1715     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1716     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1717     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1718   }
1719   /* scale  the diagonal block */
1720   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1721 
1722   if (rr) {
1723     /* Do a scatter end and then right scale the off-diagonal block */
1724     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1726   }
1727 
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1734 {
1735   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1736   PetscErrorCode    ierr;
1737   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1738   PetscInt          i,*owners = A->rmap->range;
1739   PetscInt          *nprocs,j,idx,nsends,row;
1740   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1741   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1742   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1743   MPI_Comm          comm = ((PetscObject)A)->comm;
1744   MPI_Request       *send_waits,*recv_waits;
1745   MPI_Status        recv_status,*send_status;
1746   const PetscScalar *xx;
1747   PetscScalar       *bb;
1748 #if defined(PETSC_DEBUG)
1749   PetscBool         found = PETSC_FALSE;
1750 #endif
1751 
1752   PetscFunctionBegin;
1753   /*  first count number of contributors to each processor */
1754   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1755   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1756   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1757   j = 0;
1758   for (i=0; i<N; i++) {
1759     if (lastidx > (idx = rows[i])) j = 0;
1760     lastidx = idx;
1761     for (; j<size; j++) {
1762       if (idx >= owners[j] && idx < owners[j+1]) {
1763         nprocs[2*j]++;
1764         nprocs[2*j+1] = 1;
1765         owner[i] = j;
1766 #if defined(PETSC_DEBUG)
1767         found = PETSC_TRUE;
1768 #endif
1769         break;
1770       }
1771     }
1772 #if defined(PETSC_DEBUG)
1773     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1774     found = PETSC_FALSE;
1775 #endif
1776   }
1777   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1778 
1779   if (A->nooffproczerorows) {
1780     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1781     nrecvs = nsends;
1782     nmax   = N;
1783   } else {
1784     /* inform other processors of number of messages and max length*/
1785     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1786   }
1787 
1788   /* post receives:   */
1789   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1790   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1791   for (i=0; i<nrecvs; i++) {
1792     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1793   }
1794 
1795   /* do sends:
1796      1) starts[i] gives the starting index in svalues for stuff going to
1797      the ith processor
1798   */
1799   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1800   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1801   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1802   starts[0]  = 0;
1803   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1804   for (i=0; i<N; i++) {
1805     svalues[starts[owner[i]]++] = rows[i];
1806   }
1807 
1808   starts[0] = 0;
1809   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1810   count = 0;
1811   for (i=0; i<size; i++) {
1812     if (nprocs[2*i+1]) {
1813       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1814     }
1815   }
1816   ierr = PetscFree(starts);CHKERRQ(ierr);
1817 
1818   base = owners[rank];
1819 
1820   /*  wait on receives */
1821   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1822   count  = nrecvs;
1823   slen = 0;
1824   while (count) {
1825     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1826     /* unpack receives into our local space */
1827     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1828     source[imdex]  = recv_status.MPI_SOURCE;
1829     lens[imdex]    = n;
1830     slen          += n;
1831     count--;
1832   }
1833   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1834 
1835   /* move the data into the send scatter */
1836   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1837   count = 0;
1838   for (i=0; i<nrecvs; i++) {
1839     values = rvalues + i*nmax;
1840     for (j=0; j<lens[i]; j++) {
1841       lrows[count++] = values[j] - base;
1842     }
1843   }
1844   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1845   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1846   ierr = PetscFree(owner);CHKERRQ(ierr);
1847   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1848 
1849   /* fix right hand side if needed */
1850   if (x && b) {
1851     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1852     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1853     for (i=0; i<slen; i++) {
1854       bb[lrows[i]] = diag*xx[lrows[i]];
1855     }
1856     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1857     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1858   }
1859 
1860   /* actually zap the local rows */
1861   /*
1862         Zero the required rows. If the "diagonal block" of the matrix
1863      is square and the user wishes to set the diagonal we use separate
1864      code so that MatSetValues() is not called for each diagonal allocating
1865      new memory, thus calling lots of mallocs and slowing things down.
1866 
1867   */
1868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1869   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1871     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1872   } else if (diag != 0.0) {
1873     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1874     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1875        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1876     for (i=0; i<slen; i++) {
1877       row  = lrows[i] + rstart_bs;
1878       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1879     }
1880     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882   } else {
1883     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1884   }
1885 
1886   ierr = PetscFree(lrows);CHKERRQ(ierr);
1887 
1888   /* wait on sends */
1889   if (nsends) {
1890     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1891     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1892     ierr = PetscFree(send_status);CHKERRQ(ierr);
1893   }
1894   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1895   ierr = PetscFree(svalues);CHKERRQ(ierr);
1896 
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1903 {
1904   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatEqual_MPIBAIJ"
1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1917 {
1918   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1919   Mat            a,b,c,d;
1920   PetscBool      flg;
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   a = matA->A; b = matA->B;
1925   c = matB->A; d = matB->B;
1926 
1927   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1928   if (flg) {
1929     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1930   }
1931   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatCopy_MPIBAIJ"
1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1938 {
1939   PetscErrorCode ierr;
1940   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1941   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1942 
1943   PetscFunctionBegin;
1944   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1945   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1946     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1947   } else {
1948     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1949     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1950   }
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1957 {
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1968 {
1969   PetscErrorCode ierr;
1970   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1971   PetscBLASInt   bnz,one=1;
1972   Mat_SeqBAIJ    *x,*y;
1973 
1974   PetscFunctionBegin;
1975   if (str == SAME_NONZERO_PATTERN) {
1976     PetscScalar alpha = a;
1977     x = (Mat_SeqBAIJ *)xx->A->data;
1978     y = (Mat_SeqBAIJ *)yy->A->data;
1979     bnz = PetscBLASIntCast(x->nz);
1980     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1981     x = (Mat_SeqBAIJ *)xx->B->data;
1982     y = (Mat_SeqBAIJ *)yy->B->data;
1983     bnz = PetscBLASIntCast(x->nz);
1984     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1985   } else {
1986     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1994 {
1995   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1996   PetscInt rbs,cbs;
1997   PetscErrorCode ierr;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2001   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2002   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2003   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2004   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2005   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2012 {
2013   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2014   PetscErrorCode ierr;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2018   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2025 {
2026   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2031   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2038 {
2039   PetscErrorCode ierr;
2040   IS             iscol_local;
2041   PetscInt       csize;
2042 
2043   PetscFunctionBegin;
2044   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2045   if (call == MAT_REUSE_MATRIX) {
2046     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2047     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2048   } else {
2049     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2050   }
2051   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2052   if (call == MAT_INITIAL_MATRIX) {
2053     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2054     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2055   }
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2061 /*
2062     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2063   in local and then by concatenating the local matrices the end result.
2064   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2065 */
2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2067 {
2068   PetscErrorCode ierr;
2069   PetscMPIInt    rank,size;
2070   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2071   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2072   Mat            *local,M,Mreuse;
2073   MatScalar      *vwork,*aa;
2074   MPI_Comm       comm = ((PetscObject)mat)->comm;
2075   Mat_SeqBAIJ    *aij;
2076 
2077 
2078   PetscFunctionBegin;
2079   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081 
2082   if (call ==  MAT_REUSE_MATRIX) {
2083     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2084     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2085     local = &Mreuse;
2086     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2087   } else {
2088     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2089     Mreuse = *local;
2090     ierr   = PetscFree(local);CHKERRQ(ierr);
2091   }
2092 
2093   /*
2094       m - number of local rows
2095       n - number of columns (same on all processors)
2096       rstart - first row in new global matrix generated
2097   */
2098   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2099   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2100   m    = m/bs;
2101   n    = n/bs;
2102 
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n*bs) { /* square matrix */
2117 	nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize/bs;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2130     olens = dlens + m;
2131     for (i=0; i<m; i++) {
2132       jend = ii[i+1] - ii[i];
2133       olen = 0;
2134       dlen = 0;
2135       for (j=0; j<jend; j++) {
2136         if (*jj < rstart || *jj >= rend) olen++;
2137         else dlen++;
2138         jj++;
2139       }
2140       olens[i] = olen;
2141       dlens[i] = dlen;
2142     }
2143     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2144     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2145     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2146     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2147     ierr = PetscFree(dlens);CHKERRQ(ierr);
2148   } else {
2149     PetscInt ml,nl;
2150 
2151     M = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2163   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2164   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2165   ii  = aij->i;
2166   jj  = aij->j;
2167   aa  = aij->a;
2168   for (i=0; i<m; i++) {
2169     row   = rstart/bs + i;
2170     nz    = ii[i+1] - ii[i];
2171     cwork = jj;     jj += nz;
2172     vwork = aa;     aa += nz;
2173     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2174   }
2175 
2176   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   *newmat = M;
2179 
2180   /* save submatrix used in processor for next request */
2181   if (call ==  MAT_INITIAL_MATRIX) {
2182     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2183     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2184   }
2185 
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatPermute_MPIBAIJ"
2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2192 {
2193   MPI_Comm       comm,pcomm;
2194   PetscInt       first,local_size,nrows;
2195   const PetscInt *rows;
2196   PetscMPIInt    size;
2197   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2202   /* make a collective version of 'rowp' */
2203   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2204   if (pcomm==comm) {
2205     crowp = rowp;
2206   } else {
2207     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2208     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2209     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2210     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2211   }
2212   /* collect the global row permutation and invert it */
2213   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2214   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2215   if (pcomm!=comm) {
2216     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2217   }
2218   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2219   /* get the local target indices */
2220   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2221   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2223   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2224   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2225   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2226   /* the column permutation is so much easier;
2227      make a local version of 'colp' and invert it */
2228   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2229   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2230   if (size==1) {
2231     lcolp = colp;
2232   } else {
2233     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2234     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2235     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2236   }
2237   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2238   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2239   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2240   if (size>1) {
2241     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2242     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2243   }
2244   /* now we just get the submatrix */
2245   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2246   /* clean up */
2247   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2248   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2254 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2255 {
2256   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2257   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2258 
2259   PetscFunctionBegin;
2260   if (nghosts) { *nghosts = B->nbs;}
2261   if (ghosts) {*ghosts = baij->garray;}
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2269 /*
2270     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2271 */
2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2273 {
2274   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2275   PetscErrorCode        ierr;
2276   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2277   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2278   const PetscInt        *is;
2279   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2280   PetscInt              *rowhit,M,cstart,cend,colb;
2281   PetscInt              *columnsforrow,l;
2282   IS                    *isa;
2283   PetscBool              done,flg;
2284   ISLocalToGlobalMapping map = mat->bmapping;
2285   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2286 
2287   PetscFunctionBegin;
2288   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2289   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2290 
2291   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2292   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2293   M                = mat->rmap->n/bs;
2294   cstart           = mat->cmap->rstart/bs;
2295   cend             = mat->cmap->rend/bs;
2296   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2297   c->N             = mat->cmap->N/bs;
2298   c->m             = mat->rmap->n/bs;
2299   c->rstart        = mat->rmap->rstart/bs;
2300 
2301   c->ncolors       = nis;
2302   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2307   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2308 
2309   /* Allow access to data structures of local part of matrix */
2310   if (!baij->colmap) {
2311     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2312   }
2313   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2314   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2315 
2316   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2317   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2318 
2319   for (i=0; i<nis; i++) {
2320     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2321     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2322     c->ncolumns[i] = n;
2323     if (n) {
2324       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2325       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2326       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2327     } else {
2328       c->columns[i]  = 0;
2329     }
2330 
2331     if (ctype == IS_COLORING_GLOBAL){
2332       /* Determine the total (parallel) number of columns of this color */
2333       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2334       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2335 
2336       nn   = PetscMPIIntCast(n);
2337       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2338       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2339       if (!nctot) {
2340         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2341       }
2342 
2343       disp[0] = 0;
2344       for (j=1; j<size; j++) {
2345         disp[j] = disp[j-1] + ncolsonproc[j-1];
2346       }
2347 
2348       /* Get complete list of columns for color on each processor */
2349       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2350       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2351       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2352     } else if (ctype == IS_COLORING_GHOSTED){
2353       /* Determine local number of columns of this color on this process, including ghost points */
2354       nctot = n;
2355       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2356       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2357     } else {
2358       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2359     }
2360 
2361     /*
2362        Mark all rows affect by these columns
2363     */
2364     /* Temporary option to allow for debugging/testing */
2365     flg  = PETSC_FALSE;
2366     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2367     if (!flg) {/*-----------------------------------------------------------------------------*/
2368       /* crude, fast version */
2369       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2370       /* loop over columns*/
2371       for (j=0; j<nctot; j++) {
2372         if (ctype == IS_COLORING_GHOSTED) {
2373           col = ltog[cols[j]];
2374         } else {
2375           col  = cols[j];
2376         }
2377         if (col >= cstart && col < cend) {
2378           /* column is in diagonal block of matrix */
2379           rows = A_cj + A_ci[col-cstart];
2380           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2381         } else {
2382 #if defined (PETSC_USE_CTABLE)
2383           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2384 	  colb --;
2385 #else
2386           colb = baij->colmap[col] - 1;
2387 #endif
2388           if (colb == -1) {
2389             m = 0;
2390           } else {
2391             colb = colb/bs;
2392             rows = B_cj + B_ci[colb];
2393             m    = B_ci[colb+1] - B_ci[colb];
2394           }
2395         }
2396         /* loop over columns marking them in rowhit */
2397         for (k=0; k<m; k++) {
2398           rowhit[*rows++] = col + 1;
2399         }
2400       }
2401 
2402       /* count the number of hits */
2403       nrows = 0;
2404       for (j=0; j<M; j++) {
2405         if (rowhit[j]) nrows++;
2406       }
2407       c->nrows[i]         = nrows;
2408       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2409       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2410       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2411       nrows = 0;
2412       for (j=0; j<M; j++) {
2413         if (rowhit[j]) {
2414           c->rows[i][nrows]           = j;
2415           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2416           nrows++;
2417         }
2418       }
2419     } else {/*-------------------------------------------------------------------------------*/
2420       /* slow version, using rowhit as a linked list */
2421       PetscInt currentcol,fm,mfm;
2422       rowhit[M] = M;
2423       nrows     = 0;
2424       /* loop over columns*/
2425       for (j=0; j<nctot; j++) {
2426         if (ctype == IS_COLORING_GHOSTED) {
2427           col = ltog[cols[j]];
2428         } else {
2429           col  = cols[j];
2430         }
2431         if (col >= cstart && col < cend) {
2432           /* column is in diagonal block of matrix */
2433           rows = A_cj + A_ci[col-cstart];
2434           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2435         } else {
2436 #if defined (PETSC_USE_CTABLE)
2437           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2438           colb --;
2439 #else
2440           colb = baij->colmap[col] - 1;
2441 #endif
2442           if (colb == -1) {
2443             m = 0;
2444           } else {
2445             colb = colb/bs;
2446             rows = B_cj + B_ci[colb];
2447             m    = B_ci[colb+1] - B_ci[colb];
2448           }
2449         }
2450 
2451         /* loop over columns marking them in rowhit */
2452         fm    = M; /* fm points to first entry in linked list */
2453         for (k=0; k<m; k++) {
2454           currentcol = *rows++;
2455 	  /* is it already in the list? */
2456           do {
2457             mfm  = fm;
2458             fm   = rowhit[fm];
2459           } while (fm < currentcol);
2460           /* not in list so add it */
2461           if (fm != currentcol) {
2462             nrows++;
2463             columnsforrow[currentcol] = col;
2464             /* next three lines insert new entry into linked list */
2465             rowhit[mfm]               = currentcol;
2466             rowhit[currentcol]        = fm;
2467             fm                        = currentcol;
2468             /* fm points to present position in list since we know the columns are sorted */
2469           } else {
2470             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2471           }
2472         }
2473       }
2474       c->nrows[i]         = nrows;
2475       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2476       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2477       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2478       /* now store the linked list of rows into c->rows[i] */
2479       nrows = 0;
2480       fm    = rowhit[M];
2481       do {
2482         c->rows[i][nrows]            = fm;
2483         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2484         fm                           = rowhit[fm];
2485       } while (fm < M);
2486     } /* ---------------------------------------------------------------------------------------*/
2487     ierr = PetscFree(cols);CHKERRQ(ierr);
2488   }
2489 
2490   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2491   /*
2492        vscale will contain the "diagonal" on processor scalings followed by the off processor
2493   */
2494   if (ctype == IS_COLORING_GLOBAL) {
2495     PetscInt *garray;
2496     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2497     for (i=0; i<baij->B->cmap->n/bs; i++) {
2498       for (j=0; j<bs; j++) {
2499         garray[i*bs+j] = bs*baij->garray[i]+j;
2500       }
2501     }
2502     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2503     ierr = PetscFree(garray);CHKERRQ(ierr);
2504     CHKMEMQ;
2505     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2506     for (k=0; k<c->ncolors; k++) {
2507       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2508       for (l=0; l<c->nrows[k]; l++) {
2509         col = c->columnsforrow[k][l];
2510         if (col >= cstart && col < cend) {
2511           /* column is in diagonal block of matrix */
2512           colb = col - cstart;
2513         } else {
2514           /* column  is in "off-processor" part */
2515 #if defined (PETSC_USE_CTABLE)
2516           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2517           colb --;
2518 #else
2519           colb = baij->colmap[col] - 1;
2520 #endif
2521           colb = colb/bs;
2522           colb += cend - cstart;
2523         }
2524         c->vscaleforrow[k][l] = colb;
2525       }
2526     }
2527   } else if (ctype == IS_COLORING_GHOSTED) {
2528     /* Get gtol mapping */
2529     PetscInt N = mat->cmap->N, *gtol;
2530     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2531     for (i=0; i<N; i++) gtol[i] = -1;
2532     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2533 
2534     c->vscale = 0; /* will be created in MatFDColoringApply() */
2535     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2536     for (k=0; k<c->ncolors; k++) {
2537       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2538       for (l=0; l<c->nrows[k]; l++) {
2539         col = c->columnsforrow[k][l];      /* global column index */
2540         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2541       }
2542     }
2543     ierr = PetscFree(gtol);CHKERRQ(ierr);
2544   }
2545   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2546 
2547   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2548   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2549   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2550   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2551     CHKMEMQ;
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2557 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2558 {
2559   Mat            B;
2560   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2561   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2562   Mat_SeqAIJ     *b;
2563   PetscErrorCode ierr;
2564   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2565   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2566   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2567 
2568   PetscFunctionBegin;
2569   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2570   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2571 
2572   /* ----------------------------------------------------------------
2573      Tell every processor the number of nonzeros per row
2574   */
2575   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2576   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2577     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2578   }
2579   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2580   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2581   displs     = recvcounts + size;
2582   for (i=0; i<size; i++) {
2583     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2584     displs[i]     = A->rmap->range[i]/bs;
2585   }
2586 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2587   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2588 #else
2589   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2590 #endif
2591   /* ---------------------------------------------------------------
2592      Create the sequential matrix of the same type as the local block diagonal
2593   */
2594   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2595   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2596   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2597   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2598   b = (Mat_SeqAIJ *)B->data;
2599 
2600   /*--------------------------------------------------------------------
2601     Copy my part of matrix column indices over
2602   */
2603   sendcount  = ad->nz + bd->nz;
2604   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2605   a_jsendbuf = ad->j;
2606   b_jsendbuf = bd->j;
2607   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2608   cnt        = 0;
2609   for (i=0; i<n; i++) {
2610 
2611     /* put in lower diagonal portion */
2612     m = bd->i[i+1] - bd->i[i];
2613     while (m > 0) {
2614       /* is it above diagonal (in bd (compressed) numbering) */
2615       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2616       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2617       m--;
2618     }
2619 
2620     /* put in diagonal portion */
2621     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2622       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2623     }
2624 
2625     /* put in upper diagonal portion */
2626     while (m-- > 0) {
2627       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2628     }
2629   }
2630   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2631 
2632   /*--------------------------------------------------------------------
2633     Gather all column indices to all processors
2634   */
2635   for (i=0; i<size; i++) {
2636     recvcounts[i] = 0;
2637     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2638       recvcounts[i] += lens[j];
2639     }
2640   }
2641   displs[0]  = 0;
2642   for (i=1; i<size; i++) {
2643     displs[i] = displs[i-1] + recvcounts[i-1];
2644   }
2645 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2646   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2647 #else
2648   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2649 #endif
2650   /*--------------------------------------------------------------------
2651     Assemble the matrix into useable form (note numerical values not yet set)
2652   */
2653   /* set the b->ilen (length of each row) values */
2654   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2655   /* set the b->i indices */
2656   b->i[0] = 0;
2657   for (i=1; i<=A->rmap->N/bs; i++) {
2658     b->i[i] = b->i[i-1] + lens[i-1];
2659   }
2660   ierr = PetscFree(lens);CHKERRQ(ierr);
2661   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2662   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2663   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2664 
2665   if (A->symmetric){
2666     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2667   } else if (A->hermitian) {
2668     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2669   } else if (A->structurally_symmetric) {
2670     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2671   }
2672   *newmat = B;
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatSOR_MPIBAIJ"
2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2679 {
2680   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2681   PetscErrorCode ierr;
2682   Vec            bb1 = 0;
2683 
2684   PetscFunctionBegin;
2685   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2686     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2687   }
2688 
2689   if (flag == SOR_APPLY_UPPER) {
2690     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2691     PetscFunctionReturn(0);
2692   }
2693 
2694   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2695     if (flag & SOR_ZERO_INITIAL_GUESS) {
2696       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2697       its--;
2698     }
2699 
2700     while (its--) {
2701       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2702       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703 
2704       /* update rhs: bb1 = bb - B*x */
2705       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2706       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2707 
2708       /* local sweep */
2709       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2710     }
2711   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2712     if (flag & SOR_ZERO_INITIAL_GUESS) {
2713       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2714       its--;
2715     }
2716     while (its--) {
2717       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719 
2720       /* update rhs: bb1 = bb - B*x */
2721       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2722       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2723 
2724       /* local sweep */
2725       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2726     }
2727   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2728     if (flag & SOR_ZERO_INITIAL_GUESS) {
2729       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2730       its--;
2731     }
2732     while (its--) {
2733       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2734       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2735 
2736       /* update rhs: bb1 = bb - B*x */
2737       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2738       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2739 
2740       /* local sweep */
2741       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2742     }
2743   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2744 
2745   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2750 
2751 
2752 /* -------------------------------------------------------------------*/
2753 static struct _MatOps MatOps_Values = {
2754        MatSetValues_MPIBAIJ,
2755        MatGetRow_MPIBAIJ,
2756        MatRestoreRow_MPIBAIJ,
2757        MatMult_MPIBAIJ,
2758 /* 4*/ MatMultAdd_MPIBAIJ,
2759        MatMultTranspose_MPIBAIJ,
2760        MatMultTransposeAdd_MPIBAIJ,
2761        0,
2762        0,
2763        0,
2764 /*10*/ 0,
2765        0,
2766        0,
2767        MatSOR_MPIBAIJ,
2768        MatTranspose_MPIBAIJ,
2769 /*15*/ MatGetInfo_MPIBAIJ,
2770        MatEqual_MPIBAIJ,
2771        MatGetDiagonal_MPIBAIJ,
2772        MatDiagonalScale_MPIBAIJ,
2773        MatNorm_MPIBAIJ,
2774 /*20*/ MatAssemblyBegin_MPIBAIJ,
2775        MatAssemblyEnd_MPIBAIJ,
2776        MatSetOption_MPIBAIJ,
2777        MatZeroEntries_MPIBAIJ,
2778 /*24*/ MatZeroRows_MPIBAIJ,
2779        0,
2780        0,
2781        0,
2782        0,
2783 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2784        0,
2785        0,
2786        0,
2787        0,
2788 /*34*/ MatDuplicate_MPIBAIJ,
2789        0,
2790        0,
2791        0,
2792        0,
2793 /*39*/ MatAXPY_MPIBAIJ,
2794        MatGetSubMatrices_MPIBAIJ,
2795        MatIncreaseOverlap_MPIBAIJ,
2796        MatGetValues_MPIBAIJ,
2797        MatCopy_MPIBAIJ,
2798 /*44*/ 0,
2799        MatScale_MPIBAIJ,
2800        0,
2801        0,
2802        0,
2803 /*49*/ MatSetBlockSize_MPIBAIJ,
2804        0,
2805        0,
2806        0,
2807        0,
2808 /*54*/ MatFDColoringCreate_MPIBAIJ,
2809        0,
2810        MatSetUnfactored_MPIBAIJ,
2811        MatPermute_MPIBAIJ,
2812        MatSetValuesBlocked_MPIBAIJ,
2813 /*59*/ MatGetSubMatrix_MPIBAIJ,
2814        MatDestroy_MPIBAIJ,
2815        MatView_MPIBAIJ,
2816        0,
2817        0,
2818 /*64*/ 0,
2819        0,
2820        0,
2821        0,
2822        0,
2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2824        0,
2825        0,
2826        0,
2827        0,
2828 /*74*/ 0,
2829        MatFDColoringApply_BAIJ,
2830        0,
2831        0,
2832        0,
2833 /*79*/ 0,
2834        0,
2835        0,
2836        0,
2837        MatLoad_MPIBAIJ,
2838 /*84*/ 0,
2839        0,
2840        0,
2841        0,
2842        0,
2843 /*89*/ 0,
2844        0,
2845        0,
2846        0,
2847        0,
2848 /*94*/ 0,
2849        0,
2850        0,
2851        0,
2852        0,
2853 /*99*/ 0,
2854        0,
2855        0,
2856        0,
2857        0,
2858 /*104*/0,
2859        MatRealPart_MPIBAIJ,
2860        MatImaginaryPart_MPIBAIJ,
2861        0,
2862        0,
2863 /*109*/0,
2864        0,
2865        0,
2866        0,
2867        0,
2868 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2869        0,
2870        MatGetGhosts_MPIBAIJ,
2871        0,
2872        0,
2873 /*119*/0,
2874        0,
2875        0,
2876        0
2877 };
2878 
2879 EXTERN_C_BEGIN
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2882 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2883 {
2884   PetscFunctionBegin;
2885   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2886   *iscopy = PETSC_FALSE;
2887   PetscFunctionReturn(0);
2888 }
2889 EXTERN_C_END
2890 
2891 EXTERN_C_BEGIN
2892 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2893 EXTERN_C_END
2894 
2895 EXTERN_C_BEGIN
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2899 {
2900   PetscInt       m,rstart,cstart,cend;
2901   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2902   const PetscInt *JJ=0;
2903   PetscScalar    *values=0;
2904   PetscErrorCode ierr;
2905 
2906   PetscFunctionBegin;
2907 
2908   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2909   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2910   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2911   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2912   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2913   m      = B->rmap->n/bs;
2914   rstart = B->rmap->rstart/bs;
2915   cstart = B->cmap->rstart/bs;
2916   cend   = B->cmap->rend/bs;
2917 
2918   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2919   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2920   for (i=0; i<m; i++) {
2921     nz = ii[i+1] - ii[i];
2922     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2923     nz_max = PetscMax(nz_max,nz);
2924     JJ  = jj + ii[i];
2925     for (j=0; j<nz; j++) {
2926       if (*JJ >= cstart) break;
2927       JJ++;
2928     }
2929     d = 0;
2930     for (; j<nz; j++) {
2931       if (*JJ++ >= cend) break;
2932       d++;
2933     }
2934     d_nnz[i] = d;
2935     o_nnz[i] = nz - d;
2936   }
2937   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2938   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2939 
2940   values = (PetscScalar*)V;
2941   if (!values) {
2942     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2943     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2944   }
2945   for (i=0; i<m; i++) {
2946     PetscInt          row    = i + rstart;
2947     PetscInt          ncols  = ii[i+1] - ii[i];
2948     const PetscInt    *icols = jj + ii[i];
2949     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2950     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2951   }
2952 
2953   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2954   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2956 
2957   PetscFunctionReturn(0);
2958 }
2959 EXTERN_C_END
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2963 /*@C
2964    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2965    (the default parallel PETSc format).
2966 
2967    Collective on MPI_Comm
2968 
2969    Input Parameters:
2970 +  A - the matrix
2971 .  bs - the block size
2972 .  i - the indices into j for the start of each local row (starts with zero)
2973 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2974 -  v - optional values in the matrix
2975 
2976    Level: developer
2977 
2978 .keywords: matrix, aij, compressed row, sparse, parallel
2979 
2980 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2981 @*/
2982 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2983 {
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 EXTERN_C_BEGIN
2992 #undef __FUNCT__
2993 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2994 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2995 {
2996   Mat_MPIBAIJ    *b;
2997   PetscErrorCode ierr;
2998   PetscInt       i, newbs = PetscAbs(bs);
2999 
3000   PetscFunctionBegin;
3001   if (bs < 0) {
3002     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3003       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3004     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3005     bs   = PetscAbs(bs);
3006   }
3007   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3008   bs = newbs;
3009 
3010 
3011   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3012   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3013   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3014   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3015   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3016 
3017   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3018   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3019   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3020   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3021 
3022   if (d_nnz) {
3023     for (i=0; i<B->rmap->n/bs; i++) {
3024       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3025     }
3026   }
3027   if (o_nnz) {
3028     for (i=0; i<B->rmap->n/bs; i++) {
3029       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3030     }
3031   }
3032 
3033   b = (Mat_MPIBAIJ*)B->data;
3034   b->bs2 = bs*bs;
3035   b->mbs = B->rmap->n/bs;
3036   b->nbs = B->cmap->n/bs;
3037   b->Mbs = B->rmap->N/bs;
3038   b->Nbs = B->cmap->N/bs;
3039 
3040   for (i=0; i<=b->size; i++) {
3041     b->rangebs[i] = B->rmap->range[i]/bs;
3042   }
3043   b->rstartbs = B->rmap->rstart/bs;
3044   b->rendbs   = B->rmap->rend/bs;
3045   b->cstartbs = B->cmap->rstart/bs;
3046   b->cendbs   = B->cmap->rend/bs;
3047 
3048   if (!B->preallocated) {
3049     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3050     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3051     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3052     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3053     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3054     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3055     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3056     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3057     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3058   }
3059 
3060   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3061   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3062   B->preallocated = PETSC_TRUE;
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 EXTERN_C_BEGIN
3068 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3069 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3070 EXTERN_C_END
3071 
3072 
3073 EXTERN_C_BEGIN
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3076 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3077 {
3078   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3079   PetscErrorCode ierr;
3080   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3081   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3082   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3083 
3084   PetscFunctionBegin;
3085   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3086   ii[0] = 0;
3087   CHKMEMQ;
3088   for (i=0; i<M; i++) {
3089     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3090     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3091     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3092     /* remove one from count of matrix has diagonal */
3093     for (j=id[i]; j<id[i+1]; j++) {
3094       if (jd[j] == i) {ii[i+1]--;break;}
3095     }
3096   CHKMEMQ;
3097   }
3098   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3099   cnt = 0;
3100   for (i=0; i<M; i++) {
3101     for (j=io[i]; j<io[i+1]; j++) {
3102       if (garray[jo[j]] > rstart) break;
3103       jj[cnt++] = garray[jo[j]];
3104   CHKMEMQ;
3105     }
3106     for (k=id[i]; k<id[i+1]; k++) {
3107       if (jd[k] != i) {
3108         jj[cnt++] = rstart + jd[k];
3109   CHKMEMQ;
3110       }
3111     }
3112     for (;j<io[i+1]; j++) {
3113       jj[cnt++] = garray[jo[j]];
3114   CHKMEMQ;
3115     }
3116   }
3117   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3118   PetscFunctionReturn(0);
3119 }
3120 EXTERN_C_END
3121 
3122 #include "../src/mat/impls/aij/mpi/mpiaij.h"
3123 EXTERN_C_BEGIN
3124 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3125 EXTERN_C_END
3126 
3127 EXTERN_C_BEGIN
3128 #undef __FUNCT__
3129 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3130 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3131 {
3132   PetscErrorCode ierr;
3133   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3134   Mat            B;
3135   Mat_MPIAIJ     *b = (Mat_MPIAIJ*) B->data;
3136 
3137   PetscFunctionBegin;
3138   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3139 
3140   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3141   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3142   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3143   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3144   b = (Mat_MPIAIJ*) B->data;
3145 
3146   ierr = MatDestroy(b->A);CHKERRQ(ierr);
3147   ierr = MatDestroy(b->B);CHKERRQ(ierr);
3148   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3149   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3150   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3151   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3155   if (reuse == MAT_REUSE_MATRIX) {
3156     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3157   } else {
3158    *newmat = B;
3159   }
3160   PetscFunctionReturn(0);
3161 }
3162 EXTERN_C_END
3163 
3164 EXTERN_C_BEGIN
3165 #if defined(PETSC_HAVE_MUMPS)
3166 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3167 #endif
3168 EXTERN_C_END
3169 
3170 /*MC
3171    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3172 
3173    Options Database Keys:
3174 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3175 . -mat_block_size <bs> - set the blocksize used to store the matrix
3176 - -mat_use_hash_table <fact>
3177 
3178   Level: beginner
3179 
3180 .seealso: MatCreateMPIBAIJ
3181 M*/
3182 
3183 EXTERN_C_BEGIN
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatCreate_MPIBAIJ"
3186 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
3187 {
3188   Mat_MPIBAIJ    *b;
3189   PetscErrorCode ierr;
3190   PetscBool      flg;
3191 
3192   PetscFunctionBegin;
3193   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3194   B->data = (void*)b;
3195 
3196 
3197   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3198   B->mapping    = 0;
3199   B->assembled  = PETSC_FALSE;
3200 
3201   B->insertmode = NOT_SET_VALUES;
3202   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3203   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3204 
3205   /* build local table of row and column ownerships */
3206   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3207 
3208   /* build cache for off array entries formed */
3209   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3210   b->donotstash  = PETSC_FALSE;
3211   b->colmap      = PETSC_NULL;
3212   b->garray      = PETSC_NULL;
3213   b->roworiented = PETSC_TRUE;
3214 
3215   /* stuff used in block assembly */
3216   b->barray       = 0;
3217 
3218   /* stuff used for matrix vector multiply */
3219   b->lvec         = 0;
3220   b->Mvctx        = 0;
3221 
3222   /* stuff for MatGetRow() */
3223   b->rowindices   = 0;
3224   b->rowvalues    = 0;
3225   b->getrowactive = PETSC_FALSE;
3226 
3227   /* hash table stuff */
3228   b->ht           = 0;
3229   b->hd           = 0;
3230   b->ht_size      = 0;
3231   b->ht_flag      = PETSC_FALSE;
3232   b->ht_fact      = 0;
3233   b->ht_total_ct  = 0;
3234   b->ht_insert_ct = 0;
3235 
3236   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3237     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3238     if (flg) {
3239       PetscReal fact = 1.39;
3240       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3241       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3242       if (fact <= 1.0) fact = 1.39;
3243       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3244       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3245     }
3246   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3247 
3248 #if defined(PETSC_HAVE_MUMPS)
3249   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3250 #endif
3251   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3252                                      "MatConvert_MPIBAIJ_MPIAdj",
3253                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3255                                      "MatConvert_MPIBAIJ_MPIAIJ",
3256                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3258                                      "MatStoreValues_MPIBAIJ",
3259                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3260   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3261                                      "MatRetrieveValues_MPIBAIJ",
3262                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3264                                      "MatGetDiagonalBlock_MPIBAIJ",
3265                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3267                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3268                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3270 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3271 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3273                                      "MatDiagonalScaleLocal_MPIBAIJ",
3274                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3276                                      "MatSetHashTableFactor_MPIBAIJ",
3277                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3278   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3279   PetscFunctionReturn(0);
3280 }
3281 EXTERN_C_END
3282 
3283 /*MC
3284    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3285 
3286    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3287    and MATMPIBAIJ otherwise.
3288 
3289    Options Database Keys:
3290 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3291 
3292   Level: beginner
3293 
3294 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3295 M*/
3296 
3297 EXTERN_C_BEGIN
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatCreate_BAIJ"
3300 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3301 {
3302   PetscErrorCode ierr;
3303   PetscMPIInt    size;
3304 
3305   PetscFunctionBegin;
3306   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3307   if (size == 1) {
3308     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3309   } else {
3310     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3311   }
3312   PetscFunctionReturn(0);
3313 }
3314 EXTERN_C_END
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3318 /*@C
3319    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3320    (block compressed row).  For good matrix assembly performance
3321    the user should preallocate the matrix storage by setting the parameters
3322    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3323    performance can be increased by more than a factor of 50.
3324 
3325    Collective on Mat
3326 
3327    Input Parameters:
3328 +  A - the matrix
3329 .  bs   - size of blockk
3330 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3331            submatrix  (same for all local rows)
3332 .  d_nnz - array containing the number of block nonzeros in the various block rows
3333            of the in diagonal portion of the local (possibly different for each block
3334            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3335 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3336            submatrix (same for all local rows).
3337 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3338            off-diagonal portion of the local submatrix (possibly different for
3339            each block row) or PETSC_NULL.
3340 
3341    If the *_nnz parameter is given then the *_nz parameter is ignored
3342 
3343    Options Database Keys:
3344 +   -mat_block_size - size of the blocks to use
3345 -   -mat_use_hash_table <fact>
3346 
3347    Notes:
3348    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3349    than it must be used on all processors that share the object for that argument.
3350 
3351    Storage Information:
3352    For a square global matrix we define each processor's diagonal portion
3353    to be its local rows and the corresponding columns (a square submatrix);
3354    each processor's off-diagonal portion encompasses the remainder of the
3355    local matrix (a rectangular submatrix).
3356 
3357    The user can specify preallocated storage for the diagonal part of
3358    the local submatrix with either d_nz or d_nnz (not both).  Set
3359    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3360    memory allocation.  Likewise, specify preallocated storage for the
3361    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3362 
3363    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3364    the figure below we depict these three local rows and all columns (0-11).
3365 
3366 .vb
3367            0 1 2 3 4 5 6 7 8 9 10 11
3368           -------------------
3369    row 3  |  o o o d d d o o o o o o
3370    row 4  |  o o o d d d o o o o o o
3371    row 5  |  o o o d d d o o o o o o
3372           -------------------
3373 .ve
3374 
3375    Thus, any entries in the d locations are stored in the d (diagonal)
3376    submatrix, and any entries in the o locations are stored in the
3377    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3378    stored simply in the MATSEQBAIJ format for compressed row storage.
3379 
3380    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3381    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3382    In general, for PDE problems in which most nonzeros are near the diagonal,
3383    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3384    or you will get TERRIBLE performance; see the users' manual chapter on
3385    matrices.
3386 
3387    You can call MatGetInfo() to get information on how effective the preallocation was;
3388    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3389    You can also run with the option -info and look for messages with the string
3390    malloc in them to see if additional memory allocation was needed.
3391 
3392    Level: intermediate
3393 
3394 .keywords: matrix, block, aij, compressed row, sparse, parallel
3395 
3396 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3397 @*/
3398 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3399 {
3400   PetscErrorCode ierr;
3401 
3402   PetscFunctionBegin;
3403   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatCreateMPIBAIJ"
3409 /*@C
3410    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3411    (block compressed row).  For good matrix assembly performance
3412    the user should preallocate the matrix storage by setting the parameters
3413    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3414    performance can be increased by more than a factor of 50.
3415 
3416    Collective on MPI_Comm
3417 
3418    Input Parameters:
3419 +  comm - MPI communicator
3420 .  bs   - size of blockk
3421 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3422            This value should be the same as the local size used in creating the
3423            y vector for the matrix-vector product y = Ax.
3424 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3425            This value should be the same as the local size used in creating the
3426            x vector for the matrix-vector product y = Ax.
3427 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3428 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3429 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3430            submatrix  (same for all local rows)
3431 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3432            of the in diagonal portion of the local (possibly different for each block
3433            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3434 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3435            submatrix (same for all local rows).
3436 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3437            off-diagonal portion of the local submatrix (possibly different for
3438            each block row) or PETSC_NULL.
3439 
3440    Output Parameter:
3441 .  A - the matrix
3442 
3443    Options Database Keys:
3444 +   -mat_block_size - size of the blocks to use
3445 -   -mat_use_hash_table <fact>
3446 
3447    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3448    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3449    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3450 
3451    Notes:
3452    If the *_nnz parameter is given then the *_nz parameter is ignored
3453 
3454    A nonzero block is any block that as 1 or more nonzeros in it
3455 
3456    The user MUST specify either the local or global matrix dimensions
3457    (possibly both).
3458 
3459    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3460    than it must be used on all processors that share the object for that argument.
3461 
3462    Storage Information:
3463    For a square global matrix we define each processor's diagonal portion
3464    to be its local rows and the corresponding columns (a square submatrix);
3465    each processor's off-diagonal portion encompasses the remainder of the
3466    local matrix (a rectangular submatrix).
3467 
3468    The user can specify preallocated storage for the diagonal part of
3469    the local submatrix with either d_nz or d_nnz (not both).  Set
3470    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3471    memory allocation.  Likewise, specify preallocated storage for the
3472    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3473 
3474    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3475    the figure below we depict these three local rows and all columns (0-11).
3476 
3477 .vb
3478            0 1 2 3 4 5 6 7 8 9 10 11
3479           -------------------
3480    row 3  |  o o o d d d o o o o o o
3481    row 4  |  o o o d d d o o o o o o
3482    row 5  |  o o o d d d o o o o o o
3483           -------------------
3484 .ve
3485 
3486    Thus, any entries in the d locations are stored in the d (diagonal)
3487    submatrix, and any entries in the o locations are stored in the
3488    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3489    stored simply in the MATSEQBAIJ format for compressed row storage.
3490 
3491    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3492    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3493    In general, for PDE problems in which most nonzeros are near the diagonal,
3494    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3495    or you will get TERRIBLE performance; see the users' manual chapter on
3496    matrices.
3497 
3498    Level: intermediate
3499 
3500 .keywords: matrix, block, aij, compressed row, sparse, parallel
3501 
3502 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3503 @*/
3504 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3505 {
3506   PetscErrorCode ierr;
3507   PetscMPIInt    size;
3508 
3509   PetscFunctionBegin;
3510   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3511   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3513   if (size > 1) {
3514     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3515     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3516   } else {
3517     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3518     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3519   }
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 #undef __FUNCT__
3524 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3525 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3526 {
3527   Mat            mat;
3528   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3529   PetscErrorCode ierr;
3530   PetscInt       len=0;
3531 
3532   PetscFunctionBegin;
3533   *newmat       = 0;
3534   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3535   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3536   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3537   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3538 
3539   mat->factortype   = matin->factortype;
3540   mat->preallocated = PETSC_TRUE;
3541   mat->assembled    = PETSC_TRUE;
3542   mat->insertmode   = NOT_SET_VALUES;
3543 
3544   a      = (Mat_MPIBAIJ*)mat->data;
3545   mat->rmap->bs  = matin->rmap->bs;
3546   a->bs2   = oldmat->bs2;
3547   a->mbs   = oldmat->mbs;
3548   a->nbs   = oldmat->nbs;
3549   a->Mbs   = oldmat->Mbs;
3550   a->Nbs   = oldmat->Nbs;
3551 
3552   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3553   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3554 
3555   a->size         = oldmat->size;
3556   a->rank         = oldmat->rank;
3557   a->donotstash   = oldmat->donotstash;
3558   a->roworiented  = oldmat->roworiented;
3559   a->rowindices   = 0;
3560   a->rowvalues    = 0;
3561   a->getrowactive = PETSC_FALSE;
3562   a->barray       = 0;
3563   a->rstartbs     = oldmat->rstartbs;
3564   a->rendbs       = oldmat->rendbs;
3565   a->cstartbs     = oldmat->cstartbs;
3566   a->cendbs       = oldmat->cendbs;
3567 
3568   /* hash table stuff */
3569   a->ht           = 0;
3570   a->hd           = 0;
3571   a->ht_size      = 0;
3572   a->ht_flag      = oldmat->ht_flag;
3573   a->ht_fact      = oldmat->ht_fact;
3574   a->ht_total_ct  = 0;
3575   a->ht_insert_ct = 0;
3576 
3577   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3578   if (oldmat->colmap) {
3579 #if defined (PETSC_USE_CTABLE)
3580   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3581 #else
3582   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3583   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3584   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3585 #endif
3586   } else a->colmap = 0;
3587 
3588   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3589     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3590     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3591     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3592   } else a->garray = 0;
3593 
3594   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3595   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3596   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3597   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3598   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3599 
3600   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3601   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3602   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3603   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3604   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3605   *newmat = mat;
3606 
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatLoad_MPIBAIJ"
3612 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3613 {
3614   PetscErrorCode ierr;
3615   int            fd;
3616   PetscInt       i,nz,j,rstart,rend;
3617   PetscScalar    *vals,*buf;
3618   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3619   MPI_Status     status;
3620   PetscMPIInt    rank,size,maxnz;
3621   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3622   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3623   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3624   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3625   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3626   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3627 
3628   PetscFunctionBegin;
3629   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3630     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3631   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3632 
3633   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3634   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3635   if (!rank) {
3636     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3637     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3638     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3639   }
3640 
3641   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3642 
3643   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3644   M = header[1]; N = header[2];
3645 
3646   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3647   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3648   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3649 
3650   /* If global sizes are set, check if they are consistent with that given in the file */
3651   if (sizesset) {
3652     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3653   }
3654   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3655   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3656 
3657   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3658 
3659   /*
3660      This code adds extra rows to make sure the number of rows is
3661      divisible by the blocksize
3662   */
3663   Mbs        = M/bs;
3664   extra_rows = bs - M + bs*Mbs;
3665   if (extra_rows == bs) extra_rows = 0;
3666   else                  Mbs++;
3667   if (extra_rows && !rank) {
3668     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3669   }
3670 
3671   /* determine ownership of all rows */
3672   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3673     mbs        = Mbs/size + ((Mbs % size) > rank);
3674     m          = mbs*bs;
3675   } else { /* User set */
3676     m          = newmat->rmap->n;
3677     mbs        = m/bs;
3678   }
3679   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3680   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3681 
3682   /* process 0 needs enough room for process with most rows */
3683   if (!rank) {
3684     mmax = rowners[1];
3685     for (i=2; i<size; i++) {
3686       mmax = PetscMax(mmax,rowners[i]);
3687     }
3688     mmax*=bs;
3689   } else mmax = m;
3690 
3691   rowners[0] = 0;
3692   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3693   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3694   rstart = rowners[rank];
3695   rend   = rowners[rank+1];
3696 
3697   /* distribute row lengths to all processors */
3698   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3699   if (!rank) {
3700     mend = m;
3701     if (size == 1) mend = mend - extra_rows;
3702     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3703     for (j=mend; j<m; j++) locrowlens[j] = 1;
3704     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3705     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3706     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3707     for (j=0; j<m; j++) {
3708       procsnz[0] += locrowlens[j];
3709     }
3710     for (i=1; i<size; i++) {
3711       mend = browners[i+1] - browners[i];
3712       if (i == size-1) mend = mend - extra_rows;
3713       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3714       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3715       /* calculate the number of nonzeros on each processor */
3716       for (j=0; j<browners[i+1]-browners[i]; j++) {
3717         procsnz[i] += rowlengths[j];
3718       }
3719       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3720     }
3721     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3722   } else {
3723     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3724   }
3725 
3726   if (!rank) {
3727     /* determine max buffer needed and allocate it */
3728     maxnz = procsnz[0];
3729     for (i=1; i<size; i++) {
3730       maxnz = PetscMax(maxnz,procsnz[i]);
3731     }
3732     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3733 
3734     /* read in my part of the matrix column indices  */
3735     nz     = procsnz[0];
3736     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3737     mycols = ibuf;
3738     if (size == 1)  nz -= extra_rows;
3739     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3740     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3741 
3742     /* read in every ones (except the last) and ship off */
3743     for (i=1; i<size-1; i++) {
3744       nz   = procsnz[i];
3745       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3746       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3747     }
3748     /* read in the stuff for the last proc */
3749     if (size != 1) {
3750       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3751       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3752       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3753       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3754     }
3755     ierr = PetscFree(cols);CHKERRQ(ierr);
3756   } else {
3757     /* determine buffer space needed for message */
3758     nz = 0;
3759     for (i=0; i<m; i++) {
3760       nz += locrowlens[i];
3761     }
3762     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3763     mycols = ibuf;
3764     /* receive message of column indices*/
3765     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3766     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3767     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3768   }
3769 
3770   /* loop over local rows, determining number of off diagonal entries */
3771   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3772   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3773   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3774   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3775   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3776   rowcount = 0; nzcount = 0;
3777   for (i=0; i<mbs; i++) {
3778     dcount  = 0;
3779     odcount = 0;
3780     for (j=0; j<bs; j++) {
3781       kmax = locrowlens[rowcount];
3782       for (k=0; k<kmax; k++) {
3783         tmp = mycols[nzcount++]/bs;
3784         if (!mask[tmp]) {
3785           mask[tmp] = 1;
3786           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3787           else masked1[dcount++] = tmp;
3788         }
3789       }
3790       rowcount++;
3791     }
3792 
3793     dlens[i]  = dcount;
3794     odlens[i] = odcount;
3795 
3796     /* zero out the mask elements we set */
3797     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3798     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3799   }
3800 
3801 
3802   if (!sizesset) {
3803     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3804   }
3805   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3806 
3807   if (!rank) {
3808     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3809     /* read in my part of the matrix numerical values  */
3810     nz = procsnz[0];
3811     vals = buf;
3812     mycols = ibuf;
3813     if (size == 1)  nz -= extra_rows;
3814     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3815     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3816 
3817     /* insert into matrix */
3818     jj      = rstart*bs;
3819     for (i=0; i<m; i++) {
3820       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3821       mycols += locrowlens[i];
3822       vals   += locrowlens[i];
3823       jj++;
3824     }
3825     /* read in other processors (except the last one) and ship out */
3826     for (i=1; i<size-1; i++) {
3827       nz   = procsnz[i];
3828       vals = buf;
3829       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3830       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3831     }
3832     /* the last proc */
3833     if (size != 1){
3834       nz   = procsnz[i] - extra_rows;
3835       vals = buf;
3836       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3837       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3838       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3839     }
3840     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3841   } else {
3842     /* receive numeric values */
3843     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3844 
3845     /* receive message of values*/
3846     vals   = buf;
3847     mycols = ibuf;
3848     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3849     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3850     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3851 
3852     /* insert into matrix */
3853     jj      = rstart*bs;
3854     for (i=0; i<m; i++) {
3855       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3856       mycols += locrowlens[i];
3857       vals   += locrowlens[i];
3858       jj++;
3859     }
3860   }
3861   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3862   ierr = PetscFree(buf);CHKERRQ(ierr);
3863   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3864   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3865   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3866   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3867   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3869 
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 #undef __FUNCT__
3874 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3875 /*@
3876    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3877 
3878    Input Parameters:
3879 .  mat  - the matrix
3880 .  fact - factor
3881 
3882    Not Collective, each process can use a different factor
3883 
3884    Level: advanced
3885 
3886   Notes:
3887    This can also be set by the command line option: -mat_use_hash_table <fact>
3888 
3889 .keywords: matrix, hashtable, factor, HT
3890 
3891 .seealso: MatSetOption()
3892 @*/
3893 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3894 {
3895   PetscErrorCode ierr;
3896 
3897   PetscFunctionBegin;
3898   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3899   PetscFunctionReturn(0);
3900 }
3901 
3902 EXTERN_C_BEGIN
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3905 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3906 {
3907   Mat_MPIBAIJ *baij;
3908 
3909   PetscFunctionBegin;
3910   baij = (Mat_MPIBAIJ*)mat->data;
3911   baij->ht_fact = fact;
3912   PetscFunctionReturn(0);
3913 }
3914 EXTERN_C_END
3915 
3916 #undef __FUNCT__
3917 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3918 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3919 {
3920   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3921   PetscFunctionBegin;
3922   *Ad     = a->A;
3923   *Ao     = a->B;
3924   *colmap = a->garray;
3925   PetscFunctionReturn(0);
3926 }
3927 
3928 /*
3929     Special version for direct calls from Fortran (to eliminate two function call overheads
3930 */
3931 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3932 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3933 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3934 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3935 #endif
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3939 /*@C
3940   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3941 
3942   Collective on Mat
3943 
3944   Input Parameters:
3945 + mat - the matrix
3946 . min - number of input rows
3947 . im - input rows
3948 . nin - number of input columns
3949 . in - input columns
3950 . v - numerical values input
3951 - addvin - INSERT_VALUES or ADD_VALUES
3952 
3953   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3954 
3955   Level: advanced
3956 
3957 .seealso:   MatSetValuesBlocked()
3958 @*/
3959 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3960 {
3961   /* convert input arguments to C version */
3962   Mat             mat = *matin;
3963   PetscInt        m = *min, n = *nin;
3964   InsertMode      addv = *addvin;
3965 
3966   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3967   const MatScalar *value;
3968   MatScalar       *barray=baij->barray;
3969   PetscBool       roworiented = baij->roworiented;
3970   PetscErrorCode  ierr;
3971   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3972   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3973   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3974 
3975   PetscFunctionBegin;
3976   /* tasks normally handled by MatSetValuesBlocked() */
3977   if (mat->insertmode == NOT_SET_VALUES) {
3978     mat->insertmode = addv;
3979   }
3980 #if defined(PETSC_USE_DEBUG)
3981   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3982   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3983 #endif
3984   if (mat->assembled) {
3985     mat->was_assembled = PETSC_TRUE;
3986     mat->assembled     = PETSC_FALSE;
3987   }
3988   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3989 
3990 
3991   if(!barray) {
3992     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3993     baij->barray = barray;
3994   }
3995 
3996   if (roworiented) {
3997     stepval = (n-1)*bs;
3998   } else {
3999     stepval = (m-1)*bs;
4000   }
4001   for (i=0; i<m; i++) {
4002     if (im[i] < 0) continue;
4003 #if defined(PETSC_USE_DEBUG)
4004     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4005 #endif
4006     if (im[i] >= rstart && im[i] < rend) {
4007       row = im[i] - rstart;
4008       for (j=0; j<n; j++) {
4009         /* If NumCol = 1 then a copy is not required */
4010         if ((roworiented) && (n == 1)) {
4011           barray = (MatScalar*)v + i*bs2;
4012         } else if((!roworiented) && (m == 1)) {
4013           barray = (MatScalar*)v + j*bs2;
4014         } else { /* Here a copy is required */
4015           if (roworiented) {
4016             value = v + i*(stepval+bs)*bs + j*bs;
4017           } else {
4018             value = v + j*(stepval+bs)*bs + i*bs;
4019           }
4020           for (ii=0; ii<bs; ii++,value+=stepval) {
4021             for (jj=0; jj<bs; jj++) {
4022               *barray++  = *value++;
4023             }
4024           }
4025           barray -=bs2;
4026         }
4027 
4028         if (in[j] >= cstart && in[j] < cend){
4029           col  = in[j] - cstart;
4030           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4031         }
4032         else if (in[j] < 0) continue;
4033 #if defined(PETSC_USE_DEBUG)
4034         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4035 #endif
4036         else {
4037           if (mat->was_assembled) {
4038             if (!baij->colmap) {
4039               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4040             }
4041 
4042 #if defined(PETSC_USE_DEBUG)
4043 #if defined (PETSC_USE_CTABLE)
4044             { PetscInt data;
4045               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4046               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4047             }
4048 #else
4049             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4050 #endif
4051 #endif
4052 #if defined (PETSC_USE_CTABLE)
4053 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4054             col  = (col - 1)/bs;
4055 #else
4056             col = (baij->colmap[in[j]] - 1)/bs;
4057 #endif
4058             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4059               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4060               col =  in[j];
4061             }
4062           }
4063           else col = in[j];
4064           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4065         }
4066       }
4067     } else {
4068       if (!baij->donotstash) {
4069         if (roworiented) {
4070           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4071         } else {
4072           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4073         }
4074       }
4075     }
4076   }
4077 
4078   /* task normally handled by MatSetValuesBlocked() */
4079   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4080   PetscFunctionReturn(0);
4081 }
4082 
4083 #undef __FUNCT__
4084 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4085 /*@
4086      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4087          CSR format the local rows.
4088 
4089    Collective on MPI_Comm
4090 
4091    Input Parameters:
4092 +  comm - MPI communicator
4093 .  bs - the block size, only a block size of 1 is supported
4094 .  m - number of local rows (Cannot be PETSC_DECIDE)
4095 .  n - This value should be the same as the local size used in creating the
4096        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4097        calculated if N is given) For square matrices n is almost always m.
4098 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4099 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4100 .   i - row indices
4101 .   j - column indices
4102 -   a - matrix values
4103 
4104    Output Parameter:
4105 .   mat - the matrix
4106 
4107    Level: intermediate
4108 
4109    Notes:
4110        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4111      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4112      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4113 
4114        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4115 
4116 .keywords: matrix, aij, compressed row, sparse, parallel
4117 
4118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4119           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4120 @*/
4121 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4122 {
4123   PetscErrorCode ierr;
4124 
4125 
4126  PetscFunctionBegin;
4127   if (i[0]) {
4128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4129   }
4130   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4131   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4132   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4133   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4134   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4135   PetscFunctionReturn(0);
4136 }
4137