xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 047240e14af00aad1ef65e96f6fface8924f7f7e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 EXTERN_C_BEGIN
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 EXTERN_C_END
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
70 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
77   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 /*
83      Local utility routine that creates a mapping from the global column
84    number to the local number in the off-diagonal part of the local
85    storage of the matrix.  This is done in a non scalable way since the
86    length of colmap equals the global matrix length.
87 */
88 #undef __FUNCT__
89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
91 {
92   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
93   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
94   PetscErrorCode ierr;
95   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
96 
97   PetscFunctionBegin;
98 #if defined(PETSC_USE_CTABLE)
99   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) {
101     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
102   }
103 #else
104   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
105   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
106   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
107   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
108 #endif
109   PetscFunctionReturn(0);
110 }
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113   { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118     bcol = col/bs; \
119     ridx = row % bs; cidx = col % bs; \
120     low  = 0; high = nrow; \
121     while (high-low > 3) { \
122       t = (low+high)/2; \
123       if (rp[t] > bcol) high = t; \
124       else              low  = t; \
125     } \
126     for (_i=low; _i<high; _i++) { \
127       if (rp[_i] > bcol) break; \
128       if (rp[_i] == bcol) { \
129         bap = ap +  bs2*_i + bs*cidx + ridx; \
130         if (addv == ADD_VALUES) *bap += value;  \
131         else                    *bap  = value;  \
132         goto a_noinsert; \
133       } \
134     } \
135     if (a->nonew == 1) goto a_noinsert; \
136     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138     N = nrow++ - 1;  \
139     /* shift up all the later entries in this row */ \
140     for (ii=N; ii>=_i; ii--) { \
141       rp[ii+1] = rp[ii]; \
142       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143     } \
144     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145     rp[_i]                      = bcol;  \
146     ap[bs2*_i + bs*cidx + ridx] = value;  \
147 a_noinsert:; \
148     ailen[brow] = nrow; \
149   }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152   { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156     bcol = col/bs; \
157     ridx = row % bs; cidx = col % bs; \
158     low  = 0; high = nrow; \
159     while (high-low > 3) { \
160       t = (low+high)/2; \
161       if (rp[t] > bcol) high = t; \
162       else              low  = t; \
163     } \
164     for (_i=low; _i<high; _i++) { \
165       if (rp[_i] > bcol) break; \
166       if (rp[_i] == bcol) { \
167         bap = ap +  bs2*_i + bs*cidx + ridx; \
168         if (addv == ADD_VALUES) *bap += value;  \
169         else                    *bap  = value;  \
170         goto b_noinsert; \
171       } \
172     } \
173     if (b->nonew == 1) goto b_noinsert; \
174     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176     CHKMEMQ; \
177     N = nrow++ - 1;  \
178     /* shift up all the later entries in this row */ \
179     for (ii=N; ii>=_i; ii--) { \
180       rp[ii+1] = rp[ii]; \
181       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182     } \
183     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184     rp[_i]                      = bcol;  \
185     ap[bs2*_i + bs*cidx + ridx] = value;  \
186 b_noinsert:; \
187     bilen[brow] = nrow; \
188   }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscBool      roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat         A     = baij->A;
205   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar   *aa   =a->a;
208 
209   Mat         B     = baij->B;
210   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar   *ba   =b->a;
213 
214   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig) {
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j];
231           else             value = v[i+j*m];
232           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_DEBUG)
236         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!baij->colmap) {
241               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined(PETSC_USE_CTABLE)
244             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
245             col  = col - 1;
246 #else
247             col = baij->colmap[in[j]/bs] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
250               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
251               col  =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
253               B    = baij->B;
254               b    = (Mat_SeqBAIJ*)(B)->data;
255               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
256               ba   =b->a;
257             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
258             else col += in[j]%bs;
259           } else col = in[j];
260           if (roworiented) value = v[i*n+j];
261           else             value = v[i+j*m];
262           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
263           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
264         }
265       }
266     } else {
267       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
268       if (!baij->donotstash) {
269         mat->assembled = PETSC_FALSE;
270         if (roworiented) {
271           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
272         } else {
273           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
274         }
275       }
276     }
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
284 {
285   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
286   const PetscScalar *value;
287   MatScalar         *barray     = baij->barray;
288   PetscBool         roworiented = baij->roworiented;
289   PetscErrorCode    ierr;
290   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
291   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
292   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
293 
294   PetscFunctionBegin;
295   if (!barray) {
296     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
297     baij->barray = barray;
298   }
299 
300   if (roworiented) stepval = (n-1)*bs;
301   else stepval = (m-1)*bs;
302 
303   for (i=0; i<m; i++) {
304     if (im[i] < 0) continue;
305 #if defined(PETSC_USE_DEBUG)
306     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
307 #endif
308     if (im[i] >= rstart && im[i] < rend) {
309       row = im[i] - rstart;
310       for (j=0; j<n; j++) {
311         /* If NumCol = 1 then a copy is not required */
312         if ((roworiented) && (n == 1)) {
313           barray = (MatScalar*)v + i*bs2;
314         } else if ((!roworiented) && (m == 1)) {
315           barray = (MatScalar*)v + j*bs2;
316         } else { /* Here a copy is required */
317           if (roworiented) {
318             value = v + (i*(stepval+bs) + j)*bs;
319           } else {
320             value = v + (j*(stepval+bs) + i)*bs;
321           }
322           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
323             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
324             barray += bs;
325           }
326           barray -= bs2;
327         }
328 
329         if (in[j] >= cstart && in[j] < cend) {
330           col  = in[j] - cstart;
331           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
332         } else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined(PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined(PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col  =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408     row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else if (!baij->donotstash) {
444       if (roworiented) {
445         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
446       } else {
447         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
448       }
449     }
450   }
451 #if defined(PETSC_USE_DEBUG)
452   baij->ht_total_ct  = total_ct;
453   baij->ht_insert_ct = insert_ct;
454 #endif
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
461 {
462   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
463   PetscBool         roworiented = baij->roworiented;
464   PetscErrorCode    ierr;
465   PetscInt          i,j,ii,jj,row,col;
466   PetscInt          rstart=baij->rstartbs;
467   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
468   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   PetscReal         tmp;
470   MatScalar         **HD = baij->hd,*baij_a;
471   const PetscScalar *v_t,*value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   if (roworiented) stepval = (n-1)*bs;
478   else stepval = (m-1)*bs;
479 
480   for (i=0; i<m; i++) {
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
483     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
484 #endif
485     row = im[i];
486     v_t = v + i*nbs2;
487     if (row >= rstart && row < rend) {
488       for (j=0; j<n; j++) {
489         col = in[j];
490 
491         /* Look up into the Hash Table */
492         key = row*Nbs+col+1;
493         h1  = HASH(size,key,tmp);
494 
495         idx = h1;
496 #if defined(PETSC_USE_DEBUG)
497         total_ct++;
498         insert_ct++;
499         if (HT[idx] != key) {
500           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
501           if (idx == size) {
502             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
503             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
504           }
505         }
506 #else
507         if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
511             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512           }
513         }
514 #endif
515         baij_a = HD[idx];
516         if (roworiented) {
517           /*value = v + i*(stepval+bs)*bs + j*bs;*/
518           /* value = v + (i*(stepval+bs)+j)*bs; */
519           value = v_t;
520           v_t  += bs;
521           if (addv == ADD_VALUES) {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] += *value++;
525               }
526             }
527           } else {
528             for (ii=0; ii<bs; ii++,value+=stepval) {
529               for (jj=ii; jj<bs2; jj+=bs) {
530                 baij_a[jj] = *value++;
531               }
532             }
533           }
534         } else {
535           value = v + j*(stepval+bs)*bs + i*bs;
536           if (addv == ADD_VALUES) {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] += *value++;
540               }
541             }
542           } else {
543             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
544               for (jj=0; jj<bs; jj++) {
545                 baij_a[jj] = *value++;
546               }
547             }
548           }
549         }
550       }
551     } else {
552       if (!baij->donotstash) {
553         if (roworiented) {
554           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
555         } else {
556           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557         }
558       }
559     }
560   }
561 #if defined(PETSC_USE_DEBUG)
562   baij->ht_total_ct  = total_ct;
563   baij->ht_insert_ct = insert_ct;
564 #endif
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatGetValues_MPIBAIJ"
570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
571 {
572   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
573   PetscErrorCode ierr;
574   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
575   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
576 
577   PetscFunctionBegin;
578   for (i=0; i<m; i++) {
579     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
580     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
581     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
582       row = idxm[i] - bsrstart;
583       for (j=0; j<n; j++) {
584         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
585         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
586         if (idxn[j] >= bscstart && idxn[j] < bscend) {
587           col  = idxn[j] - bscstart;
588           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589         } else {
590           if (!baij->colmap) {
591             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
592           }
593 #if defined(PETSC_USE_CTABLE)
594           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
595           data--;
596 #else
597           data = baij->colmap[idxn[j]/bs]-1;
598 #endif
599           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
600           else {
601             col  = data + idxn[j]%bs;
602             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603           }
604         }
605       }
606     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatNorm_MPIBAIJ"
613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
614 {
615   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
616   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
617   PetscErrorCode ierr;
618   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
619   PetscReal      sum = 0.0;
620   MatScalar      *v;
621 
622   PetscFunctionBegin;
623   if (baij->size == 1) {
624     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
625   } else {
626     if (type == NORM_FROBENIUS) {
627       v  = amat->a;
628       nz = amat->nz*bs2;
629       for (i=0; i<nz; i++) {
630         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
631       }
632       v  = bmat->a;
633       nz = bmat->nz*bs2;
634       for (i=0; i<nz; i++) {
635         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
636       }
637       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
638       *nrm = PetscSqrtReal(*nrm);
639     } else if (type == NORM_1) { /* max column sum */
640       PetscReal *tmp,*tmp2;
641       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
642       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
643       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
644       v    = amat->a; jj = amat->j;
645       for (i=0; i<amat->nz; i++) {
646         for (j=0; j<bs; j++) {
647           col = bs*(cstart + *jj) + j; /* column index */
648           for (row=0; row<bs; row++) {
649             tmp[col] += PetscAbsScalar(*v);  v++;
650           }
651         }
652         jj++;
653       }
654       v = bmat->a; jj = bmat->j;
655       for (i=0; i<bmat->nz; i++) {
656         for (j=0; j<bs; j++) {
657           col = bs*garray[*jj] + j;
658           for (row=0; row<bs; row++) {
659             tmp[col] += PetscAbsScalar(*v); v++;
660           }
661         }
662         jj++;
663       }
664       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
665       *nrm = 0.0;
666       for (j=0; j<mat->cmap->N; j++) {
667         if (tmp2[j] > *nrm) *nrm = tmp2[j];
668       }
669       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
670     } else if (type == NORM_INFINITY) { /* max row sum */
671       PetscReal *sums;
672       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
673       sum  = 0.0;
674       for (j=0; j<amat->mbs; j++) {
675         for (row=0; row<bs; row++) sums[row] = 0.0;
676         v  = amat->a + bs2*amat->i[j];
677         nz = amat->i[j+1]-amat->i[j];
678         for (i=0; i<nz; i++) {
679           for (col=0; col<bs; col++) {
680             for (row=0; row<bs; row++) {
681               sums[row] += PetscAbsScalar(*v); v++;
682             }
683           }
684         }
685         v  = bmat->a + bs2*bmat->i[j];
686         nz = bmat->i[j+1]-bmat->i[j];
687         for (i=0; i<nz; i++) {
688           for (col=0; col<bs; col++) {
689             for (row=0; row<bs; row++) {
690               sums[row] += PetscAbsScalar(*v); v++;
691             }
692           }
693         }
694         for (row=0; row<bs; row++) {
695           if (sums[row] > sum) sum = sums[row];
696         }
697       }
698       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
699       ierr = PetscFree(sums);CHKERRQ(ierr);
700     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 /*
706   Creates the hash table, and sets the table
707   This table is created only once.
708   If new entried need to be added to the matrix
709   then the hash table has to be destroyed and
710   recreated.
711 */
712 #undef __FUNCT__
713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
715 {
716   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
717   Mat            A     = baij->A,B=baij->B;
718   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
719   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
720   PetscErrorCode ierr;
721   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
722   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
723   PetscInt       *HT,key;
724   MatScalar      **HD;
725   PetscReal      tmp;
726 #if defined(PETSC_USE_INFO)
727   PetscInt ct=0,max=0;
728 #endif
729 
730   PetscFunctionBegin;
731   if (baij->ht) PetscFunctionReturn(0);
732 
733   baij->ht_size = (PetscInt)(factor*nz);
734   ht_size       = baij->ht_size;
735 
736   /* Allocate Memory for Hash Table */
737   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
738   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
739   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
740   HD   = baij->hd;
741   HT   = baij->ht;
742 
743   /* Loop Over A */
744   for (i=0; i<a->mbs; i++) {
745     for (j=ai[i]; j<ai[i+1]; j++) {
746       row = i+rstart;
747       col = aj[j]+cstart;
748 
749       key = row*Nbs + col + 1;
750       h1  = HASH(ht_size,key,tmp);
751       for (k=0; k<ht_size; k++) {
752         if (!HT[(h1+k)%ht_size]) {
753           HT[(h1+k)%ht_size] = key;
754           HD[(h1+k)%ht_size] = a->a + j*bs2;
755           break;
756 #if defined(PETSC_USE_INFO)
757         } else {
758           ct++;
759 #endif
760         }
761       }
762 #if defined(PETSC_USE_INFO)
763       if (k> max) max = k;
764 #endif
765     }
766   }
767   /* Loop Over B */
768   for (i=0; i<b->mbs; i++) {
769     for (j=bi[i]; j<bi[i+1]; j++) {
770       row = i+rstart;
771       col = garray[bj[j]];
772       key = row*Nbs + col + 1;
773       h1  = HASH(ht_size,key,tmp);
774       for (k=0; k<ht_size; k++) {
775         if (!HT[(h1+k)%ht_size]) {
776           HT[(h1+k)%ht_size] = key;
777           HD[(h1+k)%ht_size] = b->a + j*bs2;
778           break;
779 #if defined(PETSC_USE_INFO)
780         } else {
781           ct++;
782 #endif
783         }
784       }
785 #if defined(PETSC_USE_INFO)
786       if (k> max) max = k;
787 #endif
788     }
789   }
790 
791   /* Print Summary */
792 #if defined(PETSC_USE_INFO)
793   for (i=0,j=0; i<ht_size; i++) {
794     if (HT[i]) j++;
795   }
796   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
797 #endif
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
804 {
805   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
806   PetscErrorCode ierr;
807   PetscInt       nstash,reallocs;
808   InsertMode     addv;
809 
810   PetscFunctionBegin;
811   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
812 
813   /* make sure all processors are either in INSERTMODE or ADDMODE */
814   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
815   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
822   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
823   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
832   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
833   PetscErrorCode ierr;
834   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
835   PetscInt       *row,*col;
836   PetscBool      r1,r2,r3,other_disassembled;
837   MatScalar      *val;
838   InsertMode     addv = mat->insertmode;
839   PetscMPIInt    n;
840 
841   PetscFunctionBegin;
842   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
843   if (!baij->donotstash && !mat->nooffprocentries) {
844     while (1) {
845       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
846       if (!flg) break;
847 
848       for (i=0; i<n;) {
849         /* Now identify the consecutive vals belonging to the same row */
850         for (j=i,rstart=row[j]; j<n; j++) {
851           if (row[j] != rstart) break;
852         }
853         if (j < n) ncols = j-i;
854         else       ncols = n-i;
855         /* Now assemble all these values with a single function call */
856         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
857         i    = j;
858       }
859     }
860     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
861     /* Now process the block-stash. Since the values are stashed column-oriented,
862        set the roworiented flag to column oriented, and after MatSetValues()
863        restore the original flags */
864     r1 = baij->roworiented;
865     r2 = a->roworiented;
866     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
867 
868     baij->roworiented = PETSC_FALSE;
869     a->roworiented    = PETSC_FALSE;
870 
871     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
872     while (1) {
873       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
874       if (!flg) break;
875 
876       for (i=0; i<n;) {
877         /* Now identify the consecutive vals belonging to the same row */
878         for (j=i,rstart=row[j]; j<n; j++) {
879           if (row[j] != rstart) break;
880         }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i    = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888 
889     baij->roworiented = r1;
890     a->roworiented    = r2;
891 
892     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
893   }
894 
895   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
896   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
897 
898   /* determine if any processor has disassembled, if so we must
899      also disassemble ourselfs, in order that we may reassemble. */
900   /*
901      if nonzero structure of submatrix B cannot change then we know that
902      no processor disassembled thus we can skip this stuff
903   */
904   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
905     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
906     if (mat->was_assembled && !other_disassembled) {
907       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
908     }
909   }
910 
911   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
912     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
913   }
914   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
915   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
916   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
917 
918 #if defined(PETSC_USE_INFO)
919   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
920     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
921 
922     baij->ht_total_ct  = 0;
923     baij->ht_insert_ct = 0;
924   }
925 #endif
926   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
928 
929     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
930     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
931   }
932 
933   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
934 
935   baij->rowvalues = 0;
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
942 {
943   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       size = baij->size,rank = baij->rank;
946   PetscInt          bs   = mat->rmap->bs;
947   PetscBool         iascii,isdraw;
948   PetscViewer       sviewer;
949   PetscViewerFormat format;
950 
951   PetscFunctionBegin;
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
954   if (iascii) {
955     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
956     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
957       MatInfo info;
958       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
959       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
962                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
963       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
965       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
967       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
968       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
969       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
970       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_INFO) {
973       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
974       PetscFunctionReturn(0);
975     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
976       PetscFunctionReturn(0);
977     }
978   }
979 
980   if (isdraw) {
981     PetscDraw draw;
982     PetscBool isnull;
983     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
984     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
985   }
986 
987   if (size == 1) {
988     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
989     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
990   } else {
991     /* assemble the entire matrix onto first processor. */
992     Mat         A;
993     Mat_SeqBAIJ *Aloc;
994     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
995     MatScalar   *a;
996 
997     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
998     /* Perhaps this should be the type of mat? */
999     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1000     if (!rank) {
1001       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1002     } else {
1003       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1004     }
1005     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1006     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1007     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1008     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1009 
1010     /* copy over the A part */
1011     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1012     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1013     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1014 
1015     for (i=0; i<mbs; i++) {
1016       rvals[0] = bs*(baij->rstartbs + i);
1017       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1018       for (j=ai[i]; j<ai[i+1]; j++) {
1019         col = (baij->cstartbs+aj[j])*bs;
1020         for (k=0; k<bs; k++) {
1021           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1022           col++; a += bs;
1023         }
1024       }
1025     }
1026     /* copy over the B part */
1027     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1028     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1029     for (i=0; i<mbs; i++) {
1030       rvals[0] = bs*(baij->rstartbs + i);
1031       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1032       for (j=ai[i]; j<ai[i+1]; j++) {
1033         col = baij->garray[aj[j]]*bs;
1034         for (k=0; k<bs; k++) {
1035           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1036           col++; a += bs;
1037         }
1038       }
1039     }
1040     ierr = PetscFree(rvals);CHKERRQ(ierr);
1041     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043     /*
1044        Everyone has to call to draw the matrix since the graphics waits are
1045        synchronized across all processors that share the PetscDraw object
1046     */
1047     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     if (!rank) {
1049       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1050       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1051       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1052       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1053     }
1054     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1055     ierr = MatDestroy(&A);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1063 {
1064   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1065   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1066   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1069   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1070   int            fd;
1071   PetscScalar    *column_values;
1072   FILE           *file;
1073   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1074   PetscInt       message_count,flowcontrolcount;
1075 
1076   PetscFunctionBegin;
1077   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1078   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1079   nz   = bs2*(A->nz + B->nz);
1080   rlen = mat->rmap->n;
1081   if (!rank) {
1082     header[0] = MAT_FILE_CLASSID;
1083     header[1] = mat->rmap->N;
1084     header[2] = mat->cmap->N;
1085 
1086     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1087     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1088     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1089     /* get largest number of rows any processor has */
1090     range = mat->rmap->range;
1091     for (i=1; i<size; i++) {
1092       rlen = PetscMax(rlen,range[i+1] - range[i]);
1093     }
1094   } else {
1095     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096   }
1097 
1098   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1099   /* compute lengths of each row  */
1100   for (i=0; i<a->mbs; i++) {
1101     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1102   }
1103   /* store the row lengths to the file */
1104   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1105   if (!rank) {
1106     MPI_Status status;
1107     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1108     rlen = (range[1] - range[0])/bs;
1109     for (i=0; i<rlen; i++) {
1110       for (j=0; j<bs; j++) {
1111         row_lens[i*bs+j] = bs*crow_lens[i];
1112       }
1113     }
1114     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1115     for (i=1; i<size; i++) {
1116       rlen = (range[i+1] - range[i])/bs;
1117       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1118       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1119       for (k=0; k<rlen; k++) {
1120         for (j=0; j<bs; j++) {
1121           row_lens[k*bs+j] = bs*crow_lens[k];
1122         }
1123       }
1124       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     }
1126     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1127     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1128   } else {
1129     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1130     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1131     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1132   }
1133   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1134 
1135   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1136      information needed to make it for each row from a block row. This does require more communication but still not more than
1137      the communication needed for the nonzero values  */
1138   nzmax = nz; /*  space a largest processor needs */
1139   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1140   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1141   cnt   = 0;
1142   for (i=0; i<a->mbs; i++) {
1143     pcnt = cnt;
1144     for (j=B->i[i]; j<B->i[i+1]; j++) {
1145       if ((col = garray[B->j[j]]) > cstart) break;
1146       for (l=0; l<bs; l++) {
1147         column_indices[cnt++] = bs*col+l;
1148       }
1149     }
1150     for (k=A->i[i]; k<A->i[i+1]; k++) {
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1153       }
1154     }
1155     for (; j<B->i[i+1]; j++) {
1156       for (l=0; l<bs; l++) {
1157         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1158       }
1159     }
1160     len = cnt - pcnt;
1161     for (k=1; k<bs; k++) {
1162       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1163       cnt += len;
1164     }
1165   }
1166   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1167 
1168   /* store the columns to the file */
1169   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1170   if (!rank) {
1171     MPI_Status status;
1172     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     for (i=1; i<size; i++) {
1174       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1175       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1176       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1180   } else {
1181     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1182     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1183     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1184     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1185   }
1186   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1187 
1188   /* load up the numerical values */
1189   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1190   cnt  = 0;
1191   for (i=0; i<a->mbs; i++) {
1192     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1193     for (j=B->i[i]; j<B->i[i+1]; j++) {
1194       if (garray[B->j[j]] > cstart) break;
1195       for (l=0; l<bs; l++) {
1196         for (ll=0; ll<bs; ll++) {
1197           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1198         }
1199       }
1200       cnt += bs;
1201     }
1202     for (k=A->i[i]; k<A->i[i+1]; k++) {
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     cnt += (bs-1)*rlen;
1219   }
1220   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1221 
1222   /* store the column values to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1230       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1231       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1232     }
1233     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1234   } else {
1235     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1236     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1237     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1238     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1239   }
1240   ierr = PetscFree(column_values);CHKERRQ(ierr);
1241 
1242   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1243   if (file) {
1244     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatView_MPIBAIJ"
1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1252 {
1253   PetscErrorCode ierr;
1254   PetscBool      iascii,isdraw,issocket,isbinary;
1255 
1256   PetscFunctionBegin;
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1259   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1261   if (iascii || isdraw || issocket) {
1262     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1263   } else if (isbinary) {
1264     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1272 {
1273   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1282   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1294   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1295   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMult_MPIBAIJ"
1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1314 {
1315   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317   PetscInt       nt;
1318 
1319   PetscFunctionBegin;
1320   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1321   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1322   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1323   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1326   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1334 {
1335   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1341   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1342   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1349 {
1350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352   PetscBool      merged;
1353 
1354   PetscFunctionBegin;
1355   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1356   /* do nondiagonal part */
1357   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1358   if (!merged) {
1359     /* send it on its way */
1360     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* receive remote parts: note this assumes the values are not actually */
1364     /* inserted in yy until the next line */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   } else {
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* values actually were received in the Begin() but we need to call this nop */
1372     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1380 {
1381   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   /* do nondiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   /* send it on its way */
1388   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   /* do local part */
1390   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1391   /* receive remote parts: note this assumes the values are not actually */
1392   /* inserted in yy until the next line, which is true for my implementation*/
1393   /* but is not perhaps always true. */
1394   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 /*
1399   This only works correctly for square matrices where the subblock A->A is the
1400    diagonal block
1401 */
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1405 {
1406   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1411   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatScale_MPIBAIJ"
1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1418 {
1419   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1424   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1431 {
1432   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1433   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1434   PetscErrorCode ierr;
1435   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1436   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1437   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1438 
1439   PetscFunctionBegin;
1440   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1441   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1442   mat->getrowactive = PETSC_TRUE;
1443 
1444   if (!mat->rowvalues && (idx || v)) {
1445     /*
1446         allocate enough space to hold information from the longest row.
1447     */
1448     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1449     PetscInt    max = 1,mbs = mat->mbs,tmp;
1450     for (i=0; i<mbs; i++) {
1451       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1452       if (max < tmp) max = tmp;
1453     }
1454     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1455   }
1456   lrow = row - brstart;
1457 
1458   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1459   if (!v)   {pvA = 0; pvB = 0;}
1460   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1461   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1462   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1463   nztot = nzA + nzB;
1464 
1465   cmap = mat->garray;
1466   if (v  || idx) {
1467     if (nztot) {
1468       /* Sort by increasing column numbers, assuming A and B already sorted */
1469       PetscInt imark = -1;
1470       if (v) {
1471         *v = v_p = mat->rowvalues;
1472         for (i=0; i<nzB; i++) {
1473           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1474           else break;
1475         }
1476         imark = i;
1477         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1478         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1479       }
1480       if (idx) {
1481         *idx = idx_p = mat->rowindices;
1482         if (imark > -1) {
1483           for (i=0; i<imark; i++) {
1484             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485           }
1486         } else {
1487           for (i=0; i<nzB; i++) {
1488             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1489             else break;
1490           }
1491           imark = i;
1492         }
1493         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1494         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1495       }
1496     } else {
1497       if (idx) *idx = 0;
1498       if (v)   *v   = 0;
1499     }
1500   }
1501   *nz  = nztot;
1502   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1512 
1513   PetscFunctionBegin;
1514   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1515   baij->getrowactive = PETSC_FALSE;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1522 {
1523   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1528   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1535 {
1536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1537   Mat            A  = a->A,B = a->B;
1538   PetscErrorCode ierr;
1539   PetscReal      isend[5],irecv[5];
1540 
1541   PetscFunctionBegin;
1542   info->block_size = (PetscReal)matin->rmap->bs;
1543 
1544   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1545 
1546   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1547   isend[3] = info->memory;  isend[4] = info->mallocs;
1548 
1549   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1550 
1551   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1552   isend[3] += info->memory;  isend[4] += info->mallocs;
1553 
1554   if (flag == MAT_LOCAL) {
1555     info->nz_used      = isend[0];
1556     info->nz_allocated = isend[1];
1557     info->nz_unneeded  = isend[2];
1558     info->memory       = isend[3];
1559     info->mallocs      = isend[4];
1560   } else if (flag == MAT_GLOBAL_MAX) {
1561     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1562 
1563     info->nz_used      = irecv[0];
1564     info->nz_allocated = irecv[1];
1565     info->nz_unneeded  = irecv[2];
1566     info->memory       = irecv[3];
1567     info->mallocs      = irecv[4];
1568   } else if (flag == MAT_GLOBAL_SUM) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1577   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1578   info->fill_ratio_needed = 0;
1579   info->factor_mallocs    = 0;
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1586 {
1587   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1588   PetscErrorCode ierr;
1589 
1590   PetscFunctionBegin;
1591   switch (op) {
1592   case MAT_NEW_NONZERO_LOCATIONS:
1593   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1594   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1595   case MAT_KEEP_NONZERO_PATTERN:
1596   case MAT_NEW_NONZERO_LOCATION_ERR:
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_ROW_ORIENTED:
1601     a->roworiented = flg;
1602 
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_NEW_DIAGONALS:
1607     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1608     break;
1609   case MAT_IGNORE_OFF_PROC_ENTRIES:
1610     a->donotstash = flg;
1611     break;
1612   case MAT_USE_HASH_TABLE:
1613     a->ht_flag = flg;
1614     break;
1615   case MAT_SYMMETRIC:
1616   case MAT_STRUCTURALLY_SYMMETRIC:
1617   case MAT_HERMITIAN:
1618   case MAT_SYMMETRY_ETERNAL:
1619     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1620     break;
1621   default:
1622     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1630 {
1631   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1632   Mat_SeqBAIJ    *Aloc;
1633   Mat            B;
1634   PetscErrorCode ierr;
1635   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1636   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1637   MatScalar      *a;
1638 
1639   PetscFunctionBegin;
1640   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1641   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1642     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1643     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1644     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1645     /* Do not know preallocation information, but must set block size */
1646     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1647   } else {
1648     B = *matout;
1649   }
1650 
1651   /* copy over the A part */
1652   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1653   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1654   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1655 
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = (baij->cstartbs+aj[j])*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663 
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   /* copy over the B part */
1669   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1670   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1671   for (i=0; i<mbs; i++) {
1672     rvals[0] = bs*(baij->rstartbs + i);
1673     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1674     for (j=ai[i]; j<ai[i+1]; j++) {
1675       col = baij->garray[aj[j]]*bs;
1676       for (k=0; k<bs; k++) {
1677         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1678         col++;
1679         a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1688   else {
1689     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1697 {
1698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1699   Mat            a     = baij->A,b = baij->B;
1700   PetscErrorCode ierr;
1701   PetscInt       s1,s2,s3;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1705   if (rr) {
1706     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1707     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1708     /* Overlap communication with computation. */
1709     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   }
1711   if (ll) {
1712     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1713     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1714     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1715   }
1716   /* scale  the diagonal block */
1717   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1718 
1719   if (rr) {
1720     /* Do a scatter end and then right scale the off-diagonal block */
1721     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1722     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1730 {
1731   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1732   PetscErrorCode    ierr;
1733   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1734   PetscInt          i,*owners = A->rmap->range;
1735   PetscInt          *nprocs,j,idx,nsends,row;
1736   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1737   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1738   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1739   MPI_Comm          comm = ((PetscObject)A)->comm;
1740   MPI_Request       *send_waits,*recv_waits;
1741   MPI_Status        recv_status,*send_status;
1742   const PetscScalar *xx;
1743   PetscScalar       *bb;
1744 #if defined(PETSC_DEBUG)
1745   PetscBool found = PETSC_FALSE;
1746 #endif
1747 
1748   PetscFunctionBegin;
1749   /*  first count number of contributors to each processor */
1750   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1751   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1753   j    = 0;
1754   for (i=0; i<N; i++) {
1755     if (lastidx > (idx = rows[i])) j = 0;
1756     lastidx = idx;
1757     for (; j<size; j++) {
1758       if (idx >= owners[j] && idx < owners[j+1]) {
1759         nprocs[2*j]++;
1760         nprocs[2*j+1] = 1;
1761         owner[i]      = j;
1762 #if defined(PETSC_DEBUG)
1763         found = PETSC_TRUE;
1764 #endif
1765         break;
1766       }
1767     }
1768 #if defined(PETSC_DEBUG)
1769     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1770     found = PETSC_FALSE;
1771 #endif
1772   }
1773   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1774 
1775   if (A->nooffproczerorows) {
1776     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1777     nrecvs = nsends;
1778     nmax   = N;
1779   } else {
1780     /* inform other processors of number of messages and max length*/
1781     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1782   }
1783 
1784   /* post receives:   */
1785   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1786   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1787   for (i=0; i<nrecvs; i++) {
1788     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1789   }
1790 
1791   /* do sends:
1792      1) starts[i] gives the starting index in svalues for stuff going to
1793      the ith processor
1794   */
1795   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1796   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1797   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1798   starts[0] = 0;
1799   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1800   for (i=0; i<N; i++) {
1801     svalues[starts[owner[i]]++] = rows[i];
1802   }
1803 
1804   starts[0] = 0;
1805   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1806   count = 0;
1807   for (i=0; i<size; i++) {
1808     if (nprocs[2*i+1]) {
1809       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1810     }
1811   }
1812   ierr = PetscFree(starts);CHKERRQ(ierr);
1813 
1814   base = owners[rank];
1815 
1816   /*  wait on receives */
1817   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1818   count = nrecvs;
1819   slen  = 0;
1820   while (count) {
1821     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1822     /* unpack receives into our local space */
1823     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1824 
1825     source[imdex] = recv_status.MPI_SOURCE;
1826     lens[imdex]   = n;
1827     slen         += n;
1828     count--;
1829   }
1830   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1831 
1832   /* move the data into the send scatter */
1833   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1834   count = 0;
1835   for (i=0; i<nrecvs; i++) {
1836     values = rvalues + i*nmax;
1837     for (j=0; j<lens[i]; j++) {
1838       lrows[count++] = values[j] - base;
1839     }
1840   }
1841   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1842   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1843   ierr = PetscFree(owner);CHKERRQ(ierr);
1844   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1845 
1846   /* fix right hand side if needed */
1847   if (x && b) {
1848     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1849     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1850     for (i=0; i<slen; i++) {
1851       bb[lrows[i]] = diag*xx[lrows[i]];
1852     }
1853     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1854     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1855   }
1856 
1857   /* actually zap the local rows */
1858   /*
1859         Zero the required rows. If the "diagonal block" of the matrix
1860      is square and the user wishes to set the diagonal we use separate
1861      code so that MatSetValues() is not called for each diagonal allocating
1862      new memory, thus calling lots of mallocs and slowing things down.
1863 
1864   */
1865   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1866   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1867   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1868     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1869   } else if (diag != 0.0) {
1870     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1872        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1873     for (i=0; i<slen; i++) {
1874       row  = lrows[i] + rstart_bs;
1875       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1876     }
1877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879   } else {
1880     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1881   }
1882 
1883   ierr = PetscFree(lrows);CHKERRQ(ierr);
1884 
1885   /* wait on sends */
1886   if (nsends) {
1887     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1888     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1889     ierr = PetscFree(send_status);CHKERRQ(ierr);
1890   }
1891   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1892   ierr = PetscFree(svalues);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatEqual_MPIBAIJ"
1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1913 {
1914   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1915   Mat            a,b,c,d;
1916   PetscBool      flg;
1917   PetscErrorCode ierr;
1918 
1919   PetscFunctionBegin;
1920   a = matA->A; b = matA->B;
1921   c = matB->A; d = matB->B;
1922 
1923   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1924   if (flg) {
1925     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1926   }
1927   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatCopy_MPIBAIJ"
1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1934 {
1935   PetscErrorCode ierr;
1936   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1937   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1938 
1939   PetscFunctionBegin;
1940   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1941   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1942     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1943   } else {
1944     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1945     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1952 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1964 {
1965   PetscErrorCode ierr;
1966   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1967   PetscBLASInt   bnz,one=1;
1968   Mat_SeqBAIJ    *x,*y;
1969 
1970   PetscFunctionBegin;
1971   if (str == SAME_NONZERO_PATTERN) {
1972     PetscScalar alpha = a;
1973     x    = (Mat_SeqBAIJ*)xx->A->data;
1974     y    = (Mat_SeqBAIJ*)yy->A->data;
1975     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1976     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1977     x    = (Mat_SeqBAIJ*)xx->B->data;
1978     y    = (Mat_SeqBAIJ*)yy->B->data;
1979     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1980     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1981   } else {
1982     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1989 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1990 {
1991   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1996   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2002 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2003 {
2004   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2005   PetscErrorCode ierr;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2009   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2015 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2016 {
2017   PetscErrorCode ierr;
2018   IS             iscol_local;
2019   PetscInt       csize;
2020 
2021   PetscFunctionBegin;
2022   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2023   if (call == MAT_REUSE_MATRIX) {
2024     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2025     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2026   } else {
2027     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2028   }
2029   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2030   if (call == MAT_INITIAL_MATRIX) {
2031     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2032     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2039 /*
2040   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2041   in local and then by concatenating the local matrices the end result.
2042   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2043 */
2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    rank,size;
2048   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2049   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2050   Mat            M,Mreuse;
2051   MatScalar      *vwork,*aa;
2052   MPI_Comm       comm = ((PetscObject)mat)->comm;
2053   IS             isrow_new, iscol_new;
2054   PetscBool      idflag,allrows, allcols;
2055   Mat_SeqBAIJ    *aij;
2056 
2057   PetscFunctionBegin;
2058   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2059   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2060   /* The compression and expansion should be avoided. Doesn't point
2061      out errors, might change the indices, hence buggey */
2062   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2063   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2064 
2065   /* Check for special case: each processor gets entire matrix columns */
2066   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2067   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2068   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2069   else allcols = PETSC_FALSE;
2070 
2071   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2072   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2073   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2074   else allrows = PETSC_FALSE;
2075 
2076   if (call ==  MAT_REUSE_MATRIX) {
2077     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2078     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2079     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2080   } else {
2081     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2082   }
2083   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2084   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2085   /*
2086       m - number of local rows
2087       n - number of columns (same on all processors)
2088       rstart - first row in new global matrix generated
2089   */
2090   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2091   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2092   m    = m/bs;
2093   n    = n/bs;
2094 
2095   if (call == MAT_INITIAL_MATRIX) {
2096     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2097     ii  = aij->i;
2098     jj  = aij->j;
2099 
2100     /*
2101         Determine the number of non-zeros in the diagonal and off-diagonal
2102         portions of the matrix in order to do correct preallocation
2103     */
2104 
2105     /* first get start and end of "diagonal" columns */
2106     if (csize == PETSC_DECIDE) {
2107       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2108       if (mglobal == n*bs) { /* square matrix */
2109         nlocal = m;
2110       } else {
2111         nlocal = n/size + ((n % size) > rank);
2112       }
2113     } else {
2114       nlocal = csize/bs;
2115     }
2116     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2117     rstart = rend - nlocal;
2118     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2119 
2120     /* next, compute all the lengths */
2121     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2122     olens = dlens + m;
2123     for (i=0; i<m; i++) {
2124       jend = ii[i+1] - ii[i];
2125       olen = 0;
2126       dlen = 0;
2127       for (j=0; j<jend; j++) {
2128         if (*jj < rstart || *jj >= rend) olen++;
2129         else dlen++;
2130         jj++;
2131       }
2132       olens[i] = olen;
2133       dlens[i] = dlen;
2134     }
2135     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2136     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2137     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2138     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2139     ierr = PetscFree(dlens);CHKERRQ(ierr);
2140   } else {
2141     PetscInt ml,nl;
2142 
2143     M    = *newmat;
2144     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2145     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2146     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2147     /*
2148          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2149        rather than the slower MatSetValues().
2150     */
2151     M->was_assembled = PETSC_TRUE;
2152     M->assembled     = PETSC_FALSE;
2153   }
2154   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2155   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2156   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2157   ii   = aij->i;
2158   jj   = aij->j;
2159   aa   = aij->a;
2160   for (i=0; i<m; i++) {
2161     row   = rstart/bs + i;
2162     nz    = ii[i+1] - ii[i];
2163     cwork = jj;     jj += nz;
2164     vwork = aa;     aa += nz*bs*bs;
2165     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2166   }
2167 
2168   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2170   *newmat = M;
2171 
2172   /* save submatrix used in processor for next request */
2173   if (call ==  MAT_INITIAL_MATRIX) {
2174     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2175     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatPermute_MPIBAIJ"
2182 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2183 {
2184   MPI_Comm       comm,pcomm;
2185   PetscInt       first,rlocal_size,clocal_size,nrows;
2186   const PetscInt *rows;
2187   PetscMPIInt    size;
2188   IS             crowp,growp,irowp,lrowp,lcolp;
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2193   /* make a collective version of 'rowp' */
2194   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2195   if (pcomm==comm) {
2196     crowp = rowp;
2197   } else {
2198     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2199     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2200     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2201     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2202   }
2203   /* collect the global row permutation and invert it */
2204   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2205   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2206   if (pcomm!=comm) {
2207     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2208   }
2209   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2210   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2211   /* get the local target indices */
2212   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2213   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2214   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2215   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2216   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2217   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2218   /* the column permutation is so much easier;
2219      make a local version of 'colp' and invert it */
2220   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2221   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2222   if (size==1) {
2223     lcolp = colp;
2224   } else {
2225     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2226   }
2227   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2228   /* now we just get the submatrix */
2229   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2230   if (size>1) {
2231     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2232   }
2233   /* clean up */
2234   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2240 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2241 {
2242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2243   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2244 
2245   PetscFunctionBegin;
2246   if (nghosts) *nghosts = B->nbs;
2247   if (ghosts) *ghosts = baij->garray;
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2255 /*
2256     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2257 */
2258 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2259 {
2260   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2261   PetscErrorCode         ierr;
2262   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2263   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2264   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2265   PetscInt               nis = iscoloring->n,nctot,*cols;
2266   PetscInt               *rowhit,M,cstart,cend,colb;
2267   PetscInt               *columnsforrow,l;
2268   IS                     *isa;
2269   PetscBool              done,flg;
2270   ISLocalToGlobalMapping map   = mat->cmap->bmapping;
2271   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL),ctype=c->ctype;
2272 
2273   PetscFunctionBegin;
2274   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2275   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2276 
2277   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2278   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2279 
2280   M         = mat->rmap->n/bs;
2281   cstart    = mat->cmap->rstart/bs;
2282   cend      = mat->cmap->rend/bs;
2283   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2284   c->N      = mat->cmap->N/bs;
2285   c->m      = mat->rmap->n/bs;
2286   c->rstart = mat->rmap->rstart/bs;
2287 
2288   c->ncolors = nis;
2289   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2290   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2292   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2294   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2295 
2296   /* Allow access to data structures of local part of matrix */
2297   if (!baij->colmap) {
2298     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2299   }
2300   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2301   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2302 
2303   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2304   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2305 
2306   for (i=0; i<nis; i++) {
2307     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2308     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2309 
2310     c->ncolumns[i] = n;
2311     if (n) {
2312       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2313       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2314       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2315     } else {
2316       c->columns[i] = 0;
2317     }
2318 
2319     if (ctype == IS_COLORING_GLOBAL) {
2320       /* Determine the total (parallel) number of columns of this color */
2321       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2322       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2323 
2324       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2325       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2326       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2327       if (!nctot) {
2328         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2329       }
2330 
2331       disp[0] = 0;
2332       for (j=1; j<size; j++) {
2333         disp[j] = disp[j-1] + ncolsonproc[j-1];
2334       }
2335 
2336       /* Get complete list of columns for color on each processor */
2337       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2338       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2339       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2340     } else if (ctype == IS_COLORING_GHOSTED) {
2341       /* Determine local number of columns of this color on this process, including ghost points */
2342       nctot = n;
2343       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2344       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2345     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2346 
2347     /*
2348        Mark all rows affect by these columns
2349     */
2350     /* Temporary option to allow for debugging/testing */
2351     flg  = PETSC_FALSE;
2352     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2353     if (!flg) { /*-----------------------------------------------------------------------------*/
2354       /* crude, fast version */
2355       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2356       /* loop over columns*/
2357       for (j=0; j<nctot; j++) {
2358         if (ctype == IS_COLORING_GHOSTED) {
2359           col = ltog[cols[j]];
2360         } else {
2361           col = cols[j];
2362         }
2363         if (col >= cstart && col < cend) {
2364           /* column is in diagonal block of matrix */
2365           rows = A_cj + A_ci[col-cstart];
2366           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2367         } else {
2368 #if defined(PETSC_USE_CTABLE)
2369           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2370           colb--;
2371 #else
2372           colb = baij->colmap[col] - 1;
2373 #endif
2374           if (colb == -1) {
2375             m = 0;
2376           } else {
2377             colb = colb/bs;
2378             rows = B_cj + B_ci[colb];
2379             m    = B_ci[colb+1] - B_ci[colb];
2380           }
2381         }
2382         /* loop over columns marking them in rowhit */
2383         for (k=0; k<m; k++) {
2384           rowhit[*rows++] = col + 1;
2385         }
2386       }
2387 
2388       /* count the number of hits */
2389       nrows = 0;
2390       for (j=0; j<M; j++) {
2391         if (rowhit[j]) nrows++;
2392       }
2393       c->nrows[i] = nrows;
2394       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2395       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2396       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2397       nrows       = 0;
2398       for (j=0; j<M; j++) {
2399         if (rowhit[j]) {
2400           c->rows[i][nrows]          = j;
2401           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2402           nrows++;
2403         }
2404       }
2405     } else { /*-------------------------------------------------------------------------------*/
2406       /* slow version, using rowhit as a linked list */
2407       PetscInt currentcol,fm,mfm;
2408       rowhit[M] = M;
2409       nrows     = 0;
2410       /* loop over columns*/
2411       for (j=0; j<nctot; j++) {
2412         if (ctype == IS_COLORING_GHOSTED) {
2413           col = ltog[cols[j]];
2414         } else {
2415           col = cols[j];
2416         }
2417         if (col >= cstart && col < cend) {
2418           /* column is in diagonal block of matrix */
2419           rows = A_cj + A_ci[col-cstart];
2420           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2421         } else {
2422 #if defined(PETSC_USE_CTABLE)
2423           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2424           colb--;
2425 #else
2426           colb = baij->colmap[col] - 1;
2427 #endif
2428           if (colb == -1) {
2429             m = 0;
2430           } else {
2431             colb = colb/bs;
2432             rows = B_cj + B_ci[colb];
2433             m    = B_ci[colb+1] - B_ci[colb];
2434           }
2435         }
2436 
2437         /* loop over columns marking them in rowhit */
2438         fm = M;    /* fm points to first entry in linked list */
2439         for (k=0; k<m; k++) {
2440           currentcol = *rows++;
2441           /* is it already in the list? */
2442           do {
2443             mfm = fm;
2444             fm  = rowhit[fm];
2445           } while (fm < currentcol);
2446           /* not in list so add it */
2447           if (fm != currentcol) {
2448             nrows++;
2449             columnsforrow[currentcol] = col;
2450             /* next three lines insert new entry into linked list */
2451             rowhit[mfm]        = currentcol;
2452             rowhit[currentcol] = fm;
2453             fm                 = currentcol;
2454             /* fm points to present position in list since we know the columns are sorted */
2455           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2456         }
2457       }
2458       c->nrows[i] = nrows;
2459       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2460       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2461       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2462       /* now store the linked list of rows into c->rows[i] */
2463       nrows = 0;
2464       fm    = rowhit[M];
2465       do {
2466         c->rows[i][nrows]            = fm;
2467         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2468         fm                           = rowhit[fm];
2469       } while (fm < M);
2470     } /* ---------------------------------------------------------------------------------------*/
2471     ierr = PetscFree(cols);CHKERRQ(ierr);
2472   }
2473 
2474   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2475   /*
2476        vscale will contain the "diagonal" on processor scalings followed by the off processor
2477   */
2478   if (ctype == IS_COLORING_GLOBAL) {
2479     PetscInt *garray;
2480     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2481     for (i=0; i<baij->B->cmap->n/bs; i++) {
2482       for (j=0; j<bs; j++) {
2483         garray[i*bs+j] = bs*baij->garray[i]+j;
2484       }
2485     }
2486     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2487     ierr = PetscFree(garray);CHKERRQ(ierr);
2488     CHKMEMQ;
2489     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2490     for (k=0; k<c->ncolors; k++) {
2491       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2492       for (l=0; l<c->nrows[k]; l++) {
2493         col = c->columnsforrow[k][l];
2494         if (col >= cstart && col < cend) {
2495           /* column is in diagonal block of matrix */
2496           colb = col - cstart;
2497         } else {
2498           /* column  is in "off-processor" part */
2499 #if defined(PETSC_USE_CTABLE)
2500           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2501           colb--;
2502 #else
2503           colb = baij->colmap[col] - 1;
2504 #endif
2505           colb  = colb/bs;
2506           colb += cend - cstart;
2507         }
2508         c->vscaleforrow[k][l] = colb;
2509       }
2510     }
2511   } else if (ctype == IS_COLORING_GHOSTED) {
2512     /* Get gtol mapping */
2513     PetscInt N = mat->cmap->N, *gtol;
2514     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2515     for (i=0; i<N; i++) gtol[i] = -1;
2516     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2517 
2518     c->vscale = 0; /* will be created in MatFDColoringApply() */
2519     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2520     for (k=0; k<c->ncolors; k++) {
2521       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2522       for (l=0; l<c->nrows[k]; l++) {
2523         col = c->columnsforrow[k][l];      /* global column index */
2524 
2525         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2526       }
2527     }
2528     ierr = PetscFree(gtol);CHKERRQ(ierr);
2529   }
2530   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2531 
2532   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2533   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2534   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2535   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2536   CHKMEMQ;
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2543 {
2544   Mat            B;
2545   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2546   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2547   Mat_SeqAIJ     *b;
2548   PetscErrorCode ierr;
2549   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2550   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2551   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2552 
2553   PetscFunctionBegin;
2554   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2555   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2556 
2557   /* ----------------------------------------------------------------
2558      Tell every processor the number of nonzeros per row
2559   */
2560   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2561   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2562     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2563   }
2564   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2565   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2566   displs    = recvcounts + size;
2567   for (i=0; i<size; i++) {
2568     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2569     displs[i]     = A->rmap->range[i]/bs;
2570   }
2571 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2572   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2573 #else
2574   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2575 #endif
2576   /* ---------------------------------------------------------------
2577      Create the sequential matrix of the same type as the local block diagonal
2578   */
2579   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2580   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2581   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2582   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2583   b    = (Mat_SeqAIJ*)B->data;
2584 
2585   /*--------------------------------------------------------------------
2586     Copy my part of matrix column indices over
2587   */
2588   sendcount  = ad->nz + bd->nz;
2589   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2590   a_jsendbuf = ad->j;
2591   b_jsendbuf = bd->j;
2592   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2593   cnt        = 0;
2594   for (i=0; i<n; i++) {
2595 
2596     /* put in lower diagonal portion */
2597     m = bd->i[i+1] - bd->i[i];
2598     while (m > 0) {
2599       /* is it above diagonal (in bd (compressed) numbering) */
2600       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2601       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2602       m--;
2603     }
2604 
2605     /* put in diagonal portion */
2606     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2607       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2608     }
2609 
2610     /* put in upper diagonal portion */
2611     while (m-- > 0) {
2612       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2613     }
2614   }
2615   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2616 
2617   /*--------------------------------------------------------------------
2618     Gather all column indices to all processors
2619   */
2620   for (i=0; i<size; i++) {
2621     recvcounts[i] = 0;
2622     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2623       recvcounts[i] += lens[j];
2624     }
2625   }
2626   displs[0] = 0;
2627   for (i=1; i<size; i++) {
2628     displs[i] = displs[i-1] + recvcounts[i-1];
2629   }
2630 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2631   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2632 #else
2633   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2634 #endif
2635   /*--------------------------------------------------------------------
2636     Assemble the matrix into useable form (note numerical values not yet set)
2637   */
2638   /* set the b->ilen (length of each row) values */
2639   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2640   /* set the b->i indices */
2641   b->i[0] = 0;
2642   for (i=1; i<=A->rmap->N/bs; i++) {
2643     b->i[i] = b->i[i-1] + lens[i-1];
2644   }
2645   ierr = PetscFree(lens);CHKERRQ(ierr);
2646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2649 
2650   if (A->symmetric) {
2651     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2652   } else if (A->hermitian) {
2653     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2654   } else if (A->structurally_symmetric) {
2655     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2656   }
2657   *newmat = B;
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSOR_MPIBAIJ"
2663 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2664 {
2665   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2666   PetscErrorCode ierr;
2667   Vec            bb1 = 0;
2668 
2669   PetscFunctionBegin;
2670   if (flag == SOR_APPLY_UPPER) {
2671     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2672     PetscFunctionReturn(0);
2673   }
2674 
2675   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2676     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2677   }
2678 
2679   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2680     if (flag & SOR_ZERO_INITIAL_GUESS) {
2681       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2682       its--;
2683     }
2684 
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* update rhs: bb1 = bb - B*x */
2690       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2691       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2692 
2693       /* local sweep */
2694       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2695     }
2696   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2729 
2730   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2738 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2739 {
2740   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 
2749 /* -------------------------------------------------------------------*/
2750 static struct _MatOps MatOps_Values = {
2751        MatSetValues_MPIBAIJ,
2752        MatGetRow_MPIBAIJ,
2753        MatRestoreRow_MPIBAIJ,
2754        MatMult_MPIBAIJ,
2755 /* 4*/ MatMultAdd_MPIBAIJ,
2756        MatMultTranspose_MPIBAIJ,
2757        MatMultTransposeAdd_MPIBAIJ,
2758        0,
2759        0,
2760        0,
2761 /*10*/ 0,
2762        0,
2763        0,
2764        MatSOR_MPIBAIJ,
2765        MatTranspose_MPIBAIJ,
2766 /*15*/ MatGetInfo_MPIBAIJ,
2767        MatEqual_MPIBAIJ,
2768        MatGetDiagonal_MPIBAIJ,
2769        MatDiagonalScale_MPIBAIJ,
2770        MatNorm_MPIBAIJ,
2771 /*20*/ MatAssemblyBegin_MPIBAIJ,
2772        MatAssemblyEnd_MPIBAIJ,
2773        MatSetOption_MPIBAIJ,
2774        MatZeroEntries_MPIBAIJ,
2775 /*24*/ MatZeroRows_MPIBAIJ,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*29*/ MatSetUp_MPIBAIJ,
2781        0,
2782        0,
2783        0,
2784        0,
2785 /*34*/ MatDuplicate_MPIBAIJ,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*39*/ MatAXPY_MPIBAIJ,
2791        MatGetSubMatrices_MPIBAIJ,
2792        MatIncreaseOverlap_MPIBAIJ,
2793        MatGetValues_MPIBAIJ,
2794        MatCopy_MPIBAIJ,
2795 /*44*/ 0,
2796        MatScale_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800 /*49*/ 0,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*54*/ MatFDColoringCreate_MPIBAIJ,
2806        0,
2807        MatSetUnfactored_MPIBAIJ,
2808        MatPermute_MPIBAIJ,
2809        MatSetValuesBlocked_MPIBAIJ,
2810 /*59*/ MatGetSubMatrix_MPIBAIJ,
2811        MatDestroy_MPIBAIJ,
2812        MatView_MPIBAIJ,
2813        0,
2814        0,
2815 /*64*/ 0,
2816        0,
2817        0,
2818        0,
2819        0,
2820 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2821        0,
2822        0,
2823        0,
2824        0,
2825 /*74*/ 0,
2826        MatFDColoringApply_BAIJ,
2827        0,
2828        0,
2829        0,
2830 /*79*/ 0,
2831        0,
2832        0,
2833        0,
2834        MatLoad_MPIBAIJ,
2835 /*84*/ 0,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*89*/ 0,
2841        0,
2842        0,
2843        0,
2844        0,
2845 /*94*/ 0,
2846        0,
2847        0,
2848        0,
2849        0,
2850 /*99*/ 0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*104*/0,
2856        MatRealPart_MPIBAIJ,
2857        MatImaginaryPart_MPIBAIJ,
2858        0,
2859        0,
2860 /*109*/0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2866        0,
2867        MatGetGhosts_MPIBAIJ,
2868        0,
2869        0,
2870 /*119*/0,
2871        0,
2872        0,
2873        0,
2874        0,
2875 /*124*/0,
2876        0,
2877        MatInvertBlockDiagonal_MPIBAIJ
2878 };
2879 
2880 EXTERN_C_BEGIN
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2883 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2884 {
2885   PetscFunctionBegin;
2886   *a = ((Mat_MPIBAIJ*)A->data)->A;
2887   PetscFunctionReturn(0);
2888 }
2889 EXTERN_C_END
2890 
2891 EXTERN_C_BEGIN
2892 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2893 EXTERN_C_END
2894 
2895 EXTERN_C_BEGIN
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2899 {
2900   PetscInt       m,rstart,cstart,cend;
2901   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2902   const PetscInt *JJ    =0;
2903   PetscScalar    *values=0;
2904   PetscErrorCode ierr;
2905 
2906   PetscFunctionBegin;
2907   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2908   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2909   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2910   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2911   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2912   m      = B->rmap->n/bs;
2913   rstart = B->rmap->rstart/bs;
2914   cstart = B->cmap->rstart/bs;
2915   cend   = B->cmap->rend/bs;
2916 
2917   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2918   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2919   for (i=0; i<m; i++) {
2920     nz = ii[i+1] - ii[i];
2921     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2922     nz_max = PetscMax(nz_max,nz);
2923     JJ     = jj + ii[i];
2924     for (j=0; j<nz; j++) {
2925       if (*JJ >= cstart) break;
2926       JJ++;
2927     }
2928     d = 0;
2929     for (; j<nz; j++) {
2930       if (*JJ++ >= cend) break;
2931       d++;
2932     }
2933     d_nnz[i] = d;
2934     o_nnz[i] = nz - d;
2935   }
2936   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2937   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2938 
2939   values = (PetscScalar*)V;
2940   if (!values) {
2941     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2942     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2943   }
2944   for (i=0; i<m; i++) {
2945     PetscInt          row    = i + rstart;
2946     PetscInt          ncols  = ii[i+1] - ii[i];
2947     const PetscInt    *icols = jj + ii[i];
2948     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2949     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2950   }
2951 
2952   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2956   PetscFunctionReturn(0);
2957 }
2958 EXTERN_C_END
2959 
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2962 /*@C
2963    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2964    (the default parallel PETSc format).
2965 
2966    Collective on MPI_Comm
2967 
2968    Input Parameters:
2969 +  A - the matrix
2970 .  bs - the block size
2971 .  i - the indices into j for the start of each local row (starts with zero)
2972 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2973 -  v - optional values in the matrix
2974 
2975    Level: developer
2976 
2977 .keywords: matrix, aij, compressed row, sparse, parallel
2978 
2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2980 @*/
2981 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2982 {
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2987   PetscValidType(B,1);
2988   PetscValidLogicalCollectiveInt(B,bs,2);
2989   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 EXTERN_C_BEGIN
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2996 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2997 {
2998   Mat_MPIBAIJ    *b;
2999   PetscErrorCode ierr;
3000   PetscInt       i;
3001   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3002 
3003   PetscFunctionBegin;
3004   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3005   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3006 
3007   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3008   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3009   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3010   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3011 
3012   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3013   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3014   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3015   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3016   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3017 
3018   if (d_nnz) {
3019     for (i=0; i<B->rmap->n/bs; i++) {
3020       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3021     }
3022   }
3023   if (o_nnz) {
3024     for (i=0; i<B->rmap->n/bs; i++) {
3025       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3026     }
3027   }
3028 
3029   b      = (Mat_MPIBAIJ*)B->data;
3030   b->bs2 = bs*bs;
3031   b->mbs = B->rmap->n/bs;
3032   b->nbs = B->cmap->n/bs;
3033   b->Mbs = B->rmap->N/bs;
3034   b->Nbs = B->cmap->N/bs;
3035 
3036   for (i=0; i<=b->size; i++) {
3037     b->rangebs[i] = B->rmap->range[i]/bs;
3038   }
3039   b->rstartbs = B->rmap->rstart/bs;
3040   b->rendbs   = B->rmap->rend/bs;
3041   b->cstartbs = B->cmap->rstart/bs;
3042   b->cendbs   = B->cmap->rend/bs;
3043 
3044   if (!B->preallocated) {
3045     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3046     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3047     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3048     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3049     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3050     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3051     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3052     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3053     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3054   }
3055 
3056   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3057   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3058   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3059   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3060   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3061   B->preallocated = PETSC_TRUE;
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 EXTERN_C_BEGIN
3067 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3068 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3069 EXTERN_C_END
3070 
3071 
3072 EXTERN_C_BEGIN
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3075 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3076 {
3077   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3078   PetscErrorCode ierr;
3079   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3080   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3081   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3082 
3083   PetscFunctionBegin;
3084   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3085   ii[0] = 0;
3086   CHKMEMQ;
3087   for (i=0; i<M; i++) {
3088     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3089     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3090     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3091     /* remove one from count of matrix has diagonal */
3092     for (j=id[i]; j<id[i+1]; j++) {
3093       if (jd[j] == i) {ii[i+1]--;break;}
3094     }
3095     CHKMEMQ;
3096   }
3097   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3098   cnt  = 0;
3099   for (i=0; i<M; i++) {
3100     for (j=io[i]; j<io[i+1]; j++) {
3101       if (garray[jo[j]] > rstart) break;
3102       jj[cnt++] = garray[jo[j]];
3103       CHKMEMQ;
3104     }
3105     for (k=id[i]; k<id[i+1]; k++) {
3106       if (jd[k] != i) {
3107         jj[cnt++] = rstart + jd[k];
3108         CHKMEMQ;
3109       }
3110     }
3111     for (; j<io[i+1]; j++) {
3112       jj[cnt++] = garray[jo[j]];
3113       CHKMEMQ;
3114     }
3115   }
3116   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3117   PetscFunctionReturn(0);
3118 }
3119 EXTERN_C_END
3120 
3121 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3122 EXTERN_C_BEGIN
3123 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3124 EXTERN_C_END
3125 
3126 EXTERN_C_BEGIN
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3129 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3130 {
3131   PetscErrorCode ierr;
3132   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3133   Mat            B;
3134   Mat_MPIAIJ     *b;
3135 
3136   PetscFunctionBegin;
3137   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3138 
3139   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3140   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3141   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3142   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3143   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3144   b    = (Mat_MPIAIJ*) B->data;
3145 
3146   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3147   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3148   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3149   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3150   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3151   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3155   if (reuse == MAT_REUSE_MATRIX) {
3156     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3157   } else {
3158    *newmat = B;
3159   }
3160   PetscFunctionReturn(0);
3161 }
3162 EXTERN_C_END
3163 
3164 EXTERN_C_BEGIN
3165 #if defined(PETSC_HAVE_MUMPS)
3166 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3167 #endif
3168 EXTERN_C_END
3169 
3170 /*MC
3171    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3172 
3173    Options Database Keys:
3174 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3175 . -mat_block_size <bs> - set the blocksize used to store the matrix
3176 - -mat_use_hash_table <fact>
3177 
3178   Level: beginner
3179 
3180 .seealso: MatCreateMPIBAIJ
3181 M*/
3182 
3183 EXTERN_C_BEGIN
3184 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3185 EXTERN_C_END
3186 
3187 EXTERN_C_BEGIN
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatCreate_MPIBAIJ"
3190 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3191 {
3192   Mat_MPIBAIJ    *b;
3193   PetscErrorCode ierr;
3194   PetscBool      flg;
3195 
3196   PetscFunctionBegin;
3197   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3198   B->data = (void*)b;
3199 
3200   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3201   B->assembled = PETSC_FALSE;
3202 
3203   B->insertmode = NOT_SET_VALUES;
3204   ierr          = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3205   ierr          = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3206 
3207   /* build local table of row and column ownerships */
3208   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3209 
3210   /* build cache for off array entries formed */
3211   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3212 
3213   b->donotstash  = PETSC_FALSE;
3214   b->colmap      = PETSC_NULL;
3215   b->garray      = PETSC_NULL;
3216   b->roworiented = PETSC_TRUE;
3217 
3218   /* stuff used in block assembly */
3219   b->barray = 0;
3220 
3221   /* stuff used for matrix vector multiply */
3222   b->lvec  = 0;
3223   b->Mvctx = 0;
3224 
3225   /* stuff for MatGetRow() */
3226   b->rowindices   = 0;
3227   b->rowvalues    = 0;
3228   b->getrowactive = PETSC_FALSE;
3229 
3230   /* hash table stuff */
3231   b->ht           = 0;
3232   b->hd           = 0;
3233   b->ht_size      = 0;
3234   b->ht_flag      = PETSC_FALSE;
3235   b->ht_fact      = 0;
3236   b->ht_total_ct  = 0;
3237   b->ht_insert_ct = 0;
3238 
3239   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3240   b->ijonly = PETSC_FALSE;
3241 
3242   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3243   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3244   if (flg) {
3245     PetscReal fact = 1.39;
3246     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3247     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3248     if (fact <= 1.0) fact = 1.39;
3249     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3250     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3251   }
3252   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3253 
3254 #if defined(PETSC_HAVE_MUMPS)
3255   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3256 #endif
3257   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3258                                            "MatConvert_MPIBAIJ_MPIAdj",
3259                                            MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3260   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3261                                            "MatConvert_MPIBAIJ_MPIAIJ",
3262                                            MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3264                                            "MatConvert_MPIBAIJ_MPISBAIJ",
3265                                            MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3267                                            "MatStoreValues_MPIBAIJ",
3268                                            MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3270                                            "MatRetrieveValues_MPIBAIJ",
3271                                            MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3273                                            "MatGetDiagonalBlock_MPIBAIJ",
3274                                            MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3276                                            "MatMPIBAIJSetPreallocation_MPIBAIJ",
3277                                            MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3279                                            "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3280                                            MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3282                                            "MatDiagonalScaleLocal_MPIBAIJ",
3283                                            MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3285                                            "MatSetHashTableFactor_MPIBAIJ",
3286                                            MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3288                                            "MatConvert_MPIBAIJ_MPIBSTRM",
3289                                            MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3290   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3291   PetscFunctionReturn(0);
3292 }
3293 EXTERN_C_END
3294 
3295 /*MC
3296    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3297 
3298    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3299    and MATMPIBAIJ otherwise.
3300 
3301    Options Database Keys:
3302 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3303 
3304   Level: beginner
3305 
3306 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3307 M*/
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3311 /*@C
3312    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3313    (block compressed row).  For good matrix assembly performance
3314    the user should preallocate the matrix storage by setting the parameters
3315    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3316    performance can be increased by more than a factor of 50.
3317 
3318    Collective on Mat
3319 
3320    Input Parameters:
3321 +  A - the matrix
3322 .  bs   - size of blockk
3323 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3324            submatrix  (same for all local rows)
3325 .  d_nnz - array containing the number of block nonzeros in the various block rows
3326            of the in diagonal portion of the local (possibly different for each block
3327            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3328            set it even if it is zero.
3329 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3330            submatrix (same for all local rows).
3331 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3332            off-diagonal portion of the local submatrix (possibly different for
3333            each block row) or PETSC_NULL.
3334 
3335    If the *_nnz parameter is given then the *_nz parameter is ignored
3336 
3337    Options Database Keys:
3338 +   -mat_block_size - size of the blocks to use
3339 -   -mat_use_hash_table <fact>
3340 
3341    Notes:
3342    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3343    than it must be used on all processors that share the object for that argument.
3344 
3345    Storage Information:
3346    For a square global matrix we define each processor's diagonal portion
3347    to be its local rows and the corresponding columns (a square submatrix);
3348    each processor's off-diagonal portion encompasses the remainder of the
3349    local matrix (a rectangular submatrix).
3350 
3351    The user can specify preallocated storage for the diagonal part of
3352    the local submatrix with either d_nz or d_nnz (not both).  Set
3353    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3354    memory allocation.  Likewise, specify preallocated storage for the
3355    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3356 
3357    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3358    the figure below we depict these three local rows and all columns (0-11).
3359 
3360 .vb
3361            0 1 2 3 4 5 6 7 8 9 10 11
3362           -------------------
3363    row 3  |  o o o d d d o o o o o o
3364    row 4  |  o o o d d d o o o o o o
3365    row 5  |  o o o d d d o o o o o o
3366           -------------------
3367 .ve
3368 
3369    Thus, any entries in the d locations are stored in the d (diagonal)
3370    submatrix, and any entries in the o locations are stored in the
3371    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3372    stored simply in the MATSEQBAIJ format for compressed row storage.
3373 
3374    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3375    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3376    In general, for PDE problems in which most nonzeros are near the diagonal,
3377    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3378    or you will get TERRIBLE performance; see the users' manual chapter on
3379    matrices.
3380 
3381    You can call MatGetInfo() to get information on how effective the preallocation was;
3382    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3383    You can also run with the option -info and look for messages with the string
3384    malloc in them to see if additional memory allocation was needed.
3385 
3386    Level: intermediate
3387 
3388 .keywords: matrix, block, aij, compressed row, sparse, parallel
3389 
3390 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3391 @*/
3392 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3393 {
3394   PetscErrorCode ierr;
3395 
3396   PetscFunctionBegin;
3397   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3398   PetscValidType(B,1);
3399   PetscValidLogicalCollectiveInt(B,bs,2);
3400   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3401   PetscFunctionReturn(0);
3402 }
3403 
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatCreateBAIJ"
3406 /*@C
3407    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3408    (block compressed row).  For good matrix assembly performance
3409    the user should preallocate the matrix storage by setting the parameters
3410    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3411    performance can be increased by more than a factor of 50.
3412 
3413    Collective on MPI_Comm
3414 
3415    Input Parameters:
3416 +  comm - MPI communicator
3417 .  bs   - size of blockk
3418 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3419            This value should be the same as the local size used in creating the
3420            y vector for the matrix-vector product y = Ax.
3421 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3422            This value should be the same as the local size used in creating the
3423            x vector for the matrix-vector product y = Ax.
3424 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3425 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3426 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3427            submatrix  (same for all local rows)
3428 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3429            of the in diagonal portion of the local (possibly different for each block
3430            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3431            and set it even if it is zero.
3432 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3433            submatrix (same for all local rows).
3434 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3435            off-diagonal portion of the local submatrix (possibly different for
3436            each block row) or PETSC_NULL.
3437 
3438    Output Parameter:
3439 .  A - the matrix
3440 
3441    Options Database Keys:
3442 +   -mat_block_size - size of the blocks to use
3443 -   -mat_use_hash_table <fact>
3444 
3445    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3446    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3447    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3448 
3449    Notes:
3450    If the *_nnz parameter is given then the *_nz parameter is ignored
3451 
3452    A nonzero block is any block that as 1 or more nonzeros in it
3453 
3454    The user MUST specify either the local or global matrix dimensions
3455    (possibly both).
3456 
3457    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3458    than it must be used on all processors that share the object for that argument.
3459 
3460    Storage Information:
3461    For a square global matrix we define each processor's diagonal portion
3462    to be its local rows and the corresponding columns (a square submatrix);
3463    each processor's off-diagonal portion encompasses the remainder of the
3464    local matrix (a rectangular submatrix).
3465 
3466    The user can specify preallocated storage for the diagonal part of
3467    the local submatrix with either d_nz or d_nnz (not both).  Set
3468    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3469    memory allocation.  Likewise, specify preallocated storage for the
3470    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3471 
3472    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3473    the figure below we depict these three local rows and all columns (0-11).
3474 
3475 .vb
3476            0 1 2 3 4 5 6 7 8 9 10 11
3477           -------------------
3478    row 3  |  o o o d d d o o o o o o
3479    row 4  |  o o o d d d o o o o o o
3480    row 5  |  o o o d d d o o o o o o
3481           -------------------
3482 .ve
3483 
3484    Thus, any entries in the d locations are stored in the d (diagonal)
3485    submatrix, and any entries in the o locations are stored in the
3486    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3487    stored simply in the MATSEQBAIJ format for compressed row storage.
3488 
3489    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3490    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3491    In general, for PDE problems in which most nonzeros are near the diagonal,
3492    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3493    or you will get TERRIBLE performance; see the users' manual chapter on
3494    matrices.
3495 
3496    Level: intermediate
3497 
3498 .keywords: matrix, block, aij, compressed row, sparse, parallel
3499 
3500 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3501 @*/
3502 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3503 {
3504   PetscErrorCode ierr;
3505   PetscMPIInt    size;
3506 
3507   PetscFunctionBegin;
3508   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3509   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3511   if (size > 1) {
3512     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3513     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3514   } else {
3515     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3516     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3517   }
3518   PetscFunctionReturn(0);
3519 }
3520 
3521 #undef __FUNCT__
3522 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3523 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3524 {
3525   Mat            mat;
3526   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3527   PetscErrorCode ierr;
3528   PetscInt       len=0;
3529 
3530   PetscFunctionBegin;
3531   *newmat = 0;
3532   ierr    = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3533   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3534   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3535   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3536 
3537   mat->factortype   = matin->factortype;
3538   mat->preallocated = PETSC_TRUE;
3539   mat->assembled    = PETSC_TRUE;
3540   mat->insertmode   = NOT_SET_VALUES;
3541 
3542   a             = (Mat_MPIBAIJ*)mat->data;
3543   mat->rmap->bs = matin->rmap->bs;
3544   a->bs2        = oldmat->bs2;
3545   a->mbs        = oldmat->mbs;
3546   a->nbs        = oldmat->nbs;
3547   a->Mbs        = oldmat->Mbs;
3548   a->Nbs        = oldmat->Nbs;
3549 
3550   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3551   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3552 
3553   a->size         = oldmat->size;
3554   a->rank         = oldmat->rank;
3555   a->donotstash   = oldmat->donotstash;
3556   a->roworiented  = oldmat->roworiented;
3557   a->rowindices   = 0;
3558   a->rowvalues    = 0;
3559   a->getrowactive = PETSC_FALSE;
3560   a->barray       = 0;
3561   a->rstartbs     = oldmat->rstartbs;
3562   a->rendbs       = oldmat->rendbs;
3563   a->cstartbs     = oldmat->cstartbs;
3564   a->cendbs       = oldmat->cendbs;
3565 
3566   /* hash table stuff */
3567   a->ht           = 0;
3568   a->hd           = 0;
3569   a->ht_size      = 0;
3570   a->ht_flag      = oldmat->ht_flag;
3571   a->ht_fact      = oldmat->ht_fact;
3572   a->ht_total_ct  = 0;
3573   a->ht_insert_ct = 0;
3574 
3575   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3576   if (oldmat->colmap) {
3577 #if defined(PETSC_USE_CTABLE)
3578     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3579 #else
3580     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3581     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3582     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3583 #endif
3584   } else a->colmap = 0;
3585 
3586   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3587     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3588     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3589     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3590   } else a->garray = 0;
3591 
3592   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3593   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3594   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3595   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3596   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3597 
3598   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3599   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3600   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3601   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3602   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3603   *newmat = mat;
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 #undef __FUNCT__
3608 #define __FUNCT__ "MatLoad_MPIBAIJ"
3609 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3610 {
3611   PetscErrorCode ierr;
3612   int            fd;
3613   PetscInt       i,nz,j,rstart,rend;
3614   PetscScalar    *vals,*buf;
3615   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3616   MPI_Status     status;
3617   PetscMPIInt    rank,size,maxnz;
3618   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3619   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3620   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3621   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3622   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3623   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3627   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3628   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3629 
3630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3631   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3632   if (!rank) {
3633     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3634     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3635     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3636   }
3637 
3638   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3639 
3640   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3641   M    = header[1]; N = header[2];
3642 
3643   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3644   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3645   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3646 
3647   /* If global sizes are set, check if they are consistent with that given in the file */
3648   if (sizesset) {
3649     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3650   }
3651   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3652   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3653 
3654   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3655 
3656   /*
3657      This code adds extra rows to make sure the number of rows is
3658      divisible by the blocksize
3659   */
3660   Mbs        = M/bs;
3661   extra_rows = bs - M + bs*Mbs;
3662   if (extra_rows == bs) extra_rows = 0;
3663   else                  Mbs++;
3664   if (extra_rows && !rank) {
3665     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3666   }
3667 
3668   /* determine ownership of all rows */
3669   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3670     mbs = Mbs/size + ((Mbs % size) > rank);
3671     m   = mbs*bs;
3672   } else { /* User set */
3673     m   = newmat->rmap->n;
3674     mbs = m/bs;
3675   }
3676   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3677   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3678 
3679   /* process 0 needs enough room for process with most rows */
3680   if (!rank) {
3681     mmax = rowners[1];
3682     for (i=2; i<=size; i++) {
3683       mmax = PetscMax(mmax,rowners[i]);
3684     }
3685     mmax*=bs;
3686   } else mmax = m;
3687 
3688   rowners[0] = 0;
3689   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3690   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3691   rstart = rowners[rank];
3692   rend   = rowners[rank+1];
3693 
3694   /* distribute row lengths to all processors */
3695   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3696   if (!rank) {
3697     mend = m;
3698     if (size == 1) mend = mend - extra_rows;
3699     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3700     for (j=mend; j<m; j++) locrowlens[j] = 1;
3701     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3702     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3703     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3704     for (j=0; j<m; j++) {
3705       procsnz[0] += locrowlens[j];
3706     }
3707     for (i=1; i<size; i++) {
3708       mend = browners[i+1] - browners[i];
3709       if (i == size-1) mend = mend - extra_rows;
3710       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3711       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3712       /* calculate the number of nonzeros on each processor */
3713       for (j=0; j<browners[i+1]-browners[i]; j++) {
3714         procsnz[i] += rowlengths[j];
3715       }
3716       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3717     }
3718     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3719   } else {
3720     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3721   }
3722 
3723   if (!rank) {
3724     /* determine max buffer needed and allocate it */
3725     maxnz = procsnz[0];
3726     for (i=1; i<size; i++) {
3727       maxnz = PetscMax(maxnz,procsnz[i]);
3728     }
3729     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3730 
3731     /* read in my part of the matrix column indices  */
3732     nz     = procsnz[0];
3733     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3734     mycols = ibuf;
3735     if (size == 1) nz -= extra_rows;
3736     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3737     if (size == 1) {
3738       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3739     }
3740 
3741     /* read in every ones (except the last) and ship off */
3742     for (i=1; i<size-1; i++) {
3743       nz   = procsnz[i];
3744       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3745       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3746     }
3747     /* read in the stuff for the last proc */
3748     if (size != 1) {
3749       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3750       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3751       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3752       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3753     }
3754     ierr = PetscFree(cols);CHKERRQ(ierr);
3755   } else {
3756     /* determine buffer space needed for message */
3757     nz = 0;
3758     for (i=0; i<m; i++) {
3759       nz += locrowlens[i];
3760     }
3761     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3762     mycols = ibuf;
3763     /* receive message of column indices*/
3764     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3765     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3766     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3767   }
3768 
3769   /* loop over local rows, determining number of off diagonal entries */
3770   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3771   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3772   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3773   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3774   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3775   rowcount = 0; nzcount = 0;
3776   for (i=0; i<mbs; i++) {
3777     dcount  = 0;
3778     odcount = 0;
3779     for (j=0; j<bs; j++) {
3780       kmax = locrowlens[rowcount];
3781       for (k=0; k<kmax; k++) {
3782         tmp = mycols[nzcount++]/bs;
3783         if (!mask[tmp]) {
3784           mask[tmp] = 1;
3785           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3786           else masked1[dcount++] = tmp;
3787         }
3788       }
3789       rowcount++;
3790     }
3791 
3792     dlens[i]  = dcount;
3793     odlens[i] = odcount;
3794 
3795     /* zero out the mask elements we set */
3796     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3797     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3798   }
3799 
3800 
3801   if (!sizesset) {
3802     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3803   }
3804   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3805 
3806   if (!rank) {
3807     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3808     /* read in my part of the matrix numerical values  */
3809     nz     = procsnz[0];
3810     vals   = buf;
3811     mycols = ibuf;
3812     if (size == 1) nz -= extra_rows;
3813     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3814     if (size == 1) {
3815       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3816     }
3817 
3818     /* insert into matrix */
3819     jj = rstart*bs;
3820     for (i=0; i<m; i++) {
3821       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3822       mycols += locrowlens[i];
3823       vals   += locrowlens[i];
3824       jj++;
3825     }
3826     /* read in other processors (except the last one) and ship out */
3827     for (i=1; i<size-1; i++) {
3828       nz   = procsnz[i];
3829       vals = buf;
3830       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3831       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3832     }
3833     /* the last proc */
3834     if (size != 1) {
3835       nz   = procsnz[i] - extra_rows;
3836       vals = buf;
3837       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3838       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3839       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3840     }
3841     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3842   } else {
3843     /* receive numeric values */
3844     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3845 
3846     /* receive message of values*/
3847     vals   = buf;
3848     mycols = ibuf;
3849     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3850 
3851     /* insert into matrix */
3852     jj = rstart*bs;
3853     for (i=0; i<m; i++) {
3854       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3855       mycols += locrowlens[i];
3856       vals   += locrowlens[i];
3857       jj++;
3858     }
3859   }
3860   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3861   ierr = PetscFree(buf);CHKERRQ(ierr);
3862   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3863   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3864   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3865   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3866   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868   PetscFunctionReturn(0);
3869 }
3870 
3871 #undef __FUNCT__
3872 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3873 /*@
3874    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3875 
3876    Input Parameters:
3877 .  mat  - the matrix
3878 .  fact - factor
3879 
3880    Not Collective, each process can use a different factor
3881 
3882    Level: advanced
3883 
3884   Notes:
3885    This can also be set by the command line option: -mat_use_hash_table <fact>
3886 
3887 .keywords: matrix, hashtable, factor, HT
3888 
3889 .seealso: MatSetOption()
3890 @*/
3891 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3892 {
3893   PetscErrorCode ierr;
3894 
3895   PetscFunctionBegin;
3896   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 EXTERN_C_BEGIN
3901 #undef __FUNCT__
3902 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3903 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3904 {
3905   Mat_MPIBAIJ *baij;
3906 
3907   PetscFunctionBegin;
3908   baij          = (Mat_MPIBAIJ*)mat->data;
3909   baij->ht_fact = fact;
3910   PetscFunctionReturn(0);
3911 }
3912 EXTERN_C_END
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3916 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3917 {
3918   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3919 
3920   PetscFunctionBegin;
3921   *Ad     = a->A;
3922   *Ao     = a->B;
3923   *colmap = a->garray;
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 /*
3928     Special version for direct calls from Fortran (to eliminate two function call overheads
3929 */
3930 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3931 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3932 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3933 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3934 #endif
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3938 /*@C
3939   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3940 
3941   Collective on Mat
3942 
3943   Input Parameters:
3944 + mat - the matrix
3945 . min - number of input rows
3946 . im - input rows
3947 . nin - number of input columns
3948 . in - input columns
3949 . v - numerical values input
3950 - addvin - INSERT_VALUES or ADD_VALUES
3951 
3952   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3953 
3954   Level: advanced
3955 
3956 .seealso:   MatSetValuesBlocked()
3957 @*/
3958 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3959 {
3960   /* convert input arguments to C version */
3961   Mat        mat  = *matin;
3962   PetscInt   m    = *min, n = *nin;
3963   InsertMode addv = *addvin;
3964 
3965   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3966   const MatScalar *value;
3967   MatScalar       *barray     = baij->barray;
3968   PetscBool       roworiented = baij->roworiented;
3969   PetscErrorCode  ierr;
3970   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3971   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3972   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3973 
3974   PetscFunctionBegin;
3975   /* tasks normally handled by MatSetValuesBlocked() */
3976   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3977 #if defined(PETSC_USE_DEBUG)
3978   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3979   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3980 #endif
3981   if (mat->assembled) {
3982     mat->was_assembled = PETSC_TRUE;
3983     mat->assembled     = PETSC_FALSE;
3984   }
3985   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3986 
3987 
3988   if (!barray) {
3989     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3990     baij->barray = barray;
3991   }
3992 
3993   if (roworiented) stepval = (n-1)*bs;
3994   else stepval = (m-1)*bs;
3995 
3996   for (i=0; i<m; i++) {
3997     if (im[i] < 0) continue;
3998 #if defined(PETSC_USE_DEBUG)
3999     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4000 #endif
4001     if (im[i] >= rstart && im[i] < rend) {
4002       row = im[i] - rstart;
4003       for (j=0; j<n; j++) {
4004         /* If NumCol = 1 then a copy is not required */
4005         if ((roworiented) && (n == 1)) {
4006           barray = (MatScalar*)v + i*bs2;
4007         } else if ((!roworiented) && (m == 1)) {
4008           barray = (MatScalar*)v + j*bs2;
4009         } else { /* Here a copy is required */
4010           if (roworiented) {
4011             value = v + i*(stepval+bs)*bs + j*bs;
4012           } else {
4013             value = v + j*(stepval+bs)*bs + i*bs;
4014           }
4015           for (ii=0; ii<bs; ii++,value+=stepval) {
4016             for (jj=0; jj<bs; jj++) {
4017               *barray++ = *value++;
4018             }
4019           }
4020           barray -=bs2;
4021         }
4022 
4023         if (in[j] >= cstart && in[j] < cend) {
4024           col  = in[j] - cstart;
4025           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4026         } else if (in[j] < 0) continue;
4027 #if defined(PETSC_USE_DEBUG)
4028         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4029 #endif
4030         else {
4031           if (mat->was_assembled) {
4032             if (!baij->colmap) {
4033               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4034             }
4035 
4036 #if defined(PETSC_USE_DEBUG)
4037 #if defined(PETSC_USE_CTABLE)
4038             { PetscInt data;
4039               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4040               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4041             }
4042 #else
4043             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4044 #endif
4045 #endif
4046 #if defined(PETSC_USE_CTABLE)
4047             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4048             col  = (col - 1)/bs;
4049 #else
4050             col = (baij->colmap[in[j]] - 1)/bs;
4051 #endif
4052             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4053               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4054               col  =  in[j];
4055             }
4056           } else col = in[j];
4057           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4058         }
4059       }
4060     } else {
4061       if (!baij->donotstash) {
4062         if (roworiented) {
4063           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4064         } else {
4065           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4066         }
4067       }
4068     }
4069   }
4070 
4071   /* task normally handled by MatSetValuesBlocked() */
4072   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4073   PetscFunctionReturn(0);
4074 }
4075 
4076 #undef __FUNCT__
4077 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4078 /*@
4079      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4080          CSR format the local rows.
4081 
4082    Collective on MPI_Comm
4083 
4084    Input Parameters:
4085 +  comm - MPI communicator
4086 .  bs - the block size, only a block size of 1 is supported
4087 .  m - number of local rows (Cannot be PETSC_DECIDE)
4088 .  n - This value should be the same as the local size used in creating the
4089        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4090        calculated if N is given) For square matrices n is almost always m.
4091 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4092 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4093 .   i - row indices
4094 .   j - column indices
4095 -   a - matrix values
4096 
4097    Output Parameter:
4098 .   mat - the matrix
4099 
4100    Level: intermediate
4101 
4102    Notes:
4103        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4104      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4105      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4106 
4107        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4108 
4109 .keywords: matrix, aij, compressed row, sparse, parallel
4110 
4111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4112           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4113 @*/
4114 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4115 {
4116   PetscErrorCode ierr;
4117 
4118   PetscFunctionBegin;
4119   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4120   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4121   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4122   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4123   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4124   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4125   PetscFunctionReturn(0);
4126 }
4127