xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++) {
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B = baij->B;
249               b = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba=b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if ((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + (i*(stepval+bs) + j)*bs;
316           } else {
317             value = v + (j*(stepval+bs) + i)*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
320             for (jj=0; jj<bs; jj++) {
321               barray[jj]  = value[jj];
322             }
323             barray += bs;
324           }
325           barray -= bs2;
326         }
327 
328         if (in[j] >= cstart && in[j] < cend) {
329           col  = in[j] - cstart;
330           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
331         }
332         else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined (PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined (PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479   if (roworiented) {
480     stepval = (n-1)*bs;
481   } else {
482     stepval = (m-1)*bs;
483   }
484   for (i=0; i<m; i++) {
485 #if defined(PETSC_USE_DEBUG)
486     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
487     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
488 #endif
489     row   = im[i];
490     v_t   = v + i*nbs2;
491     if (row >= rstart && row < rend) {
492       for (j=0; j<n; j++) {
493         col = in[j];
494 
495         /* Look up into the Hash Table */
496         key = row*Nbs+col+1;
497         h1  = HASH(size,key,tmp);
498 
499         idx = h1;
500 #if defined(PETSC_USE_DEBUG)
501         total_ct++;
502         insert_ct++;
503        if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #else
511         if (HT[idx] != key) {
512           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
513           if (idx == size) {
514             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
515             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
516           }
517         }
518 #endif
519         baij_a = HD[idx];
520         if (roworiented) {
521           /*value = v + i*(stepval+bs)*bs + j*bs;*/
522           /* value = v + (i*(stepval+bs)+j)*bs; */
523           value = v_t;
524           v_t  += bs;
525           if (addv == ADD_VALUES) {
526             for (ii=0; ii<bs; ii++,value+=stepval) {
527               for (jj=ii; jj<bs2; jj+=bs) {
528                 baij_a[jj]  += *value++;
529               }
530             }
531           } else {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  = *value++;
535               }
536             }
537           }
538         } else {
539           value = v + j*(stepval+bs)*bs + i*bs;
540           if (addv == ADD_VALUES) {
541             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
542               for (jj=0; jj<bs; jj++) {
543                 baij_a[jj]  += *value++;
544               }
545             }
546           } else {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  = *value++;
550               }
551             }
552           }
553         }
554       }
555     } else {
556       if (!baij->donotstash) {
557         if (roworiented) {
558           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
559         } else {
560           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
561         }
562       }
563     }
564   }
565 #if defined(PETSC_USE_DEBUG)
566   baij->ht_total_ct = total_ct;
567   baij->ht_insert_ct = insert_ct;
568 #endif
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatGetValues_MPIBAIJ"
574 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
575 {
576   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
577   PetscErrorCode ierr;
578   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
579   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
580 
581   PetscFunctionBegin;
582   for (i=0; i<m; i++) {
583     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
584     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
585     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
586       row = idxm[i] - bsrstart;
587       for (j=0; j<n; j++) {
588         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
589         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
590         if (idxn[j] >= bscstart && idxn[j] < bscend) {
591           col = idxn[j] - bscstart;
592           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593         } else {
594           if (!baij->colmap) {
595             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
596           }
597 #if defined (PETSC_USE_CTABLE)
598           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
599           data --;
600 #else
601           data = baij->colmap[idxn[j]/bs]-1;
602 #endif
603           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
604           else {
605             col  = data + idxn[j]%bs;
606             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607           }
608         }
609       }
610     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatNorm_MPIBAIJ"
617 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
618 {
619   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
620   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
621   PetscErrorCode ierr;
622   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
623   PetscReal      sum = 0.0;
624   MatScalar      *v;
625 
626   PetscFunctionBegin;
627   if (baij->size == 1) {
628     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
629   } else {
630     if (type == NORM_FROBENIUS) {
631       v = amat->a;
632       nz = amat->nz*bs2;
633       for (i=0; i<nz; i++) {
634         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
635       }
636       v = bmat->a;
637       nz = bmat->nz*bs2;
638       for (i=0; i<nz; i++) {
639         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
640       }
641       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
642       *nrm = PetscSqrtReal(*nrm);
643     } else if (type == NORM_1) { /* max column sum */
644       PetscReal *tmp,*tmp2;
645       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
646       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
647       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
648       v = amat->a; jj = amat->j;
649       for (i=0; i<amat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*(cstart + *jj) + j; /* column index */
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v);  v++;
654           }
655         }
656         jj++;
657       }
658       v = bmat->a; jj = bmat->j;
659       for (i=0; i<bmat->nz; i++) {
660         for (j=0; j<bs; j++) {
661           col = bs*garray[*jj] + j;
662           for (row=0; row<bs; row++) {
663             tmp[col] += PetscAbsScalar(*v); v++;
664           }
665         }
666         jj++;
667       }
668       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
669       *nrm = 0.0;
670       for (j=0; j<mat->cmap->N; j++) {
671         if (tmp2[j] > *nrm) *nrm = tmp2[j];
672       }
673       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
674     } else if (type == NORM_INFINITY) { /* max row sum */
675       PetscReal *sums;
676       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
677       sum = 0.0;
678       for (j=0; j<amat->mbs; j++) {
679         for (row=0; row<bs; row++) sums[row] = 0.0;
680         v = amat->a + bs2*amat->i[j];
681         nz = amat->i[j+1]-amat->i[j];
682         for (i=0; i<nz; i++) {
683           for (col=0; col<bs; col++) {
684             for (row=0; row<bs; row++) {
685               sums[row] += PetscAbsScalar(*v); v++;
686             }
687           }
688         }
689         v = bmat->a + bs2*bmat->i[j];
690         nz = bmat->i[j+1]-bmat->i[j];
691         for (i=0; i<nz; i++) {
692           for (col=0; col<bs; col++) {
693             for (row=0; row<bs; row++) {
694               sums[row] += PetscAbsScalar(*v); v++;
695             }
696           }
697         }
698         for (row=0; row<bs; row++) {
699           if (sums[row] > sum) sum = sums[row];
700         }
701       }
702       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
703       ierr = PetscFree(sums);CHKERRQ(ierr);
704     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   Creates the hash table, and sets the table
711   This table is created only once.
712   If new entried need to be added to the matrix
713   then the hash table has to be destroyed and
714   recreated.
715 */
716 #undef __FUNCT__
717 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
718 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
719 {
720   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
721   Mat            A = baij->A,B=baij->B;
722   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
723   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
724   PetscErrorCode ierr;
725   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
726   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
727   PetscInt       *HT,key;
728   MatScalar      **HD;
729   PetscReal      tmp;
730 #if defined(PETSC_USE_INFO)
731   PetscInt       ct=0,max=0;
732 #endif
733 
734   PetscFunctionBegin;
735   if (baij->ht) PetscFunctionReturn(0);
736 
737   baij->ht_size = (PetscInt)(factor*nz);
738   ht_size       = baij->ht_size;
739 
740   /* Allocate Memory for Hash Table */
741   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
742   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
743   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
744   HD   = baij->hd;
745   HT   = baij->ht;
746 
747   /* Loop Over A */
748   for (i=0; i<a->mbs; i++) {
749     for (j=ai[i]; j<ai[i+1]; j++) {
750       row = i+rstart;
751       col = aj[j]+cstart;
752 
753       key = row*Nbs + col + 1;
754       h1  = HASH(ht_size,key,tmp);
755       for (k=0; k<ht_size; k++) {
756         if (!HT[(h1+k)%ht_size]) {
757           HT[(h1+k)%ht_size] = key;
758           HD[(h1+k)%ht_size] = a->a + j*bs2;
759           break;
760 #if defined(PETSC_USE_INFO)
761         } else {
762           ct++;
763 #endif
764         }
765       }
766 #if defined(PETSC_USE_INFO)
767       if (k> max) max = k;
768 #endif
769     }
770   }
771   /* Loop Over B */
772   for (i=0; i<b->mbs; i++) {
773     for (j=bi[i]; j<bi[i+1]; j++) {
774       row = i+rstart;
775       col = garray[bj[j]];
776       key = row*Nbs + col + 1;
777       h1  = HASH(ht_size,key,tmp);
778       for (k=0; k<ht_size; k++) {
779         if (!HT[(h1+k)%ht_size]) {
780           HT[(h1+k)%ht_size] = key;
781           HD[(h1+k)%ht_size] = b->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794 
795   /* Print Summary */
796 #if defined(PETSC_USE_INFO)
797   for (i=0,j=0; i<ht_size; i++) {
798     if (HT[i]) {j++;}
799   }
800   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
801 #endif
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
807 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
808 {
809   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
810   PetscErrorCode ierr;
811   PetscInt       nstash,reallocs;
812   InsertMode     addv;
813 
814   PetscFunctionBegin;
815   if (baij->donotstash || mat->nooffprocentries) {
816     PetscFunctionReturn(0);
817   }
818 
819   /* make sure all processors are either in INSERTMODE or ADDMODE */
820   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
821   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
822   mat->insertmode = addv; /* in case this processor had no cache */
823 
824   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
825   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
826   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
827   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
828   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
829   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
835 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
836 {
837   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
838   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
839   PetscErrorCode ierr;
840   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
841   PetscInt       *row,*col;
842   PetscBool      r1,r2,r3,other_disassembled;
843   MatScalar      *val;
844   InsertMode     addv = mat->insertmode;
845   PetscMPIInt    n;
846 
847   PetscFunctionBegin;
848   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
849   if (!baij->donotstash && !mat->nooffprocentries) {
850     while (1) {
851       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
852       if (!flg) break;
853 
854       for (i=0; i<n;) {
855         /* Now identify the consecutive vals belonging to the same row */
856         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
857         if (j < n) ncols = j-i;
858         else       ncols = n-i;
859         /* Now assemble all these values with a single function call */
860         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
861         i = j;
862       }
863     }
864     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
865     /* Now process the block-stash. Since the values are stashed column-oriented,
866        set the roworiented flag to column oriented, and after MatSetValues()
867        restore the original flags */
868     r1 = baij->roworiented;
869     r2 = a->roworiented;
870     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
871     baij->roworiented = PETSC_FALSE;
872     a->roworiented    = PETSC_FALSE;
873     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
874     while (1) {
875       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
876       if (!flg) break;
877 
878       for (i=0; i<n;) {
879         /* Now identify the consecutive vals belonging to the same row */
880         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888     baij->roworiented = r1;
889     a->roworiented    = r2;
890     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
891   }
892 
893   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
894   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
895 
896   /* determine if any processor has disassembled, if so we must
897      also disassemble ourselfs, in order that we may reassemble. */
898   /*
899      if nonzero structure of submatrix B cannot change then we know that
900      no processor disassembled thus we can skip this stuff
901   */
902   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
903     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
904     if (mat->was_assembled && !other_disassembled) {
905       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
906     }
907   }
908 
909   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
910     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
911   }
912   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
913   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
915 
916 #if defined(PETSC_USE_INFO)
917   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
918     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
919     baij->ht_total_ct  = 0;
920     baij->ht_insert_ct = 0;
921   }
922 #endif
923   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
924     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
925     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
926     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
927   }
928 
929   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
930   baij->rowvalues = 0;
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
937 {
938   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
939   PetscErrorCode    ierr;
940   PetscMPIInt       size = baij->size,rank = baij->rank;
941   PetscInt          bs = mat->rmap->bs;
942   PetscBool         iascii,isdraw;
943   PetscViewer       sviewer;
944   PetscViewerFormat format;
945 
946   PetscFunctionBegin;
947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
949   if (iascii) {
950     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
951     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
952       MatInfo info;
953       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
954       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
956       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
957              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
958       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
964       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
965       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     } else if (format == PETSC_VIEWER_ASCII_INFO) {
968       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
971       PetscFunctionReturn(0);
972     }
973   }
974 
975   if (isdraw) {
976     PetscDraw       draw;
977     PetscBool  isnull;
978     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
979     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
980   }
981 
982   if (size == 1) {
983     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
984     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
985   } else {
986     /* assemble the entire matrix onto first processor. */
987     Mat         A;
988     Mat_SeqBAIJ *Aloc;
989     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
990     MatScalar   *a;
991 
992     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
993     /* Perhaps this should be the type of mat? */
994     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
995     if (!rank) {
996       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
997     } else {
998       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
999     }
1000     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1001     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1002     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1003     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1004 
1005     /* copy over the A part */
1006     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1007     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1008     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1009 
1010     for (i=0; i<mbs; i++) {
1011       rvals[0] = bs*(baij->rstartbs + i);
1012       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1013       for (j=ai[i]; j<ai[i+1]; j++) {
1014         col = (baij->cstartbs+aj[j])*bs;
1015         for (k=0; k<bs; k++) {
1016           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1017           col++; a += bs;
1018         }
1019       }
1020     }
1021     /* copy over the B part */
1022     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1023     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = baij->garray[aj[j]]*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     ierr = PetscFree(rvals);CHKERRQ(ierr);
1036     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     /*
1039        Everyone has to call to draw the matrix since the graphics waits are
1040        synchronized across all processors that share the PetscDraw object
1041     */
1042     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1043     if (!rank) {
1044       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1045     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1046       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1047       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1050     ierr = MatDestroy(&A);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1058 {
1059   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1060   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1061   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1062   PetscErrorCode ierr;
1063   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1064   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1065   int            fd;
1066   PetscScalar    *column_values;
1067   FILE           *file;
1068   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1069   PetscInt       message_count,flowcontrolcount;
1070 
1071   PetscFunctionBegin;
1072   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1074   nz   = bs2*(A->nz + B->nz);
1075   rlen = mat->rmap->n;
1076   if (!rank) {
1077     header[0] = MAT_FILE_CLASSID;
1078     header[1] = mat->rmap->N;
1079     header[2] = mat->cmap->N;
1080     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1081     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1082     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1083     /* get largest number of rows any processor has */
1084     range = mat->rmap->range;
1085     for (i=1; i<size; i++) {
1086       rlen = PetscMax(rlen,range[i+1] - range[i]);
1087     }
1088   } else {
1089     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1090   }
1091 
1092   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1093   /* compute lengths of each row  */
1094   for (i=0; i<a->mbs; i++) {
1095     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1096   }
1097   /* store the row lengths to the file */
1098   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1099   if (!rank) {
1100     MPI_Status status;
1101     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1102     rlen  = (range[1] - range[0])/bs;
1103     for (i=0; i<rlen; i++) {
1104       for (j=0; j<bs; j++) {
1105         row_lens[i*bs+j] = bs*crow_lens[i];
1106       }
1107     }
1108     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1109     for (i=1; i<size; i++) {
1110       rlen = (range[i+1] - range[i])/bs;
1111       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1112       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1113       for (k=0; k<rlen; k++) {
1114         for (j=0; j<bs; j++) {
1115           row_lens[k*bs+j] = bs*crow_lens[k];
1116         }
1117       }
1118       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1119     }
1120     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1121     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1122   } else {
1123     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1124     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1125     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1126   }
1127   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1128 
1129   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1130      information needed to make it for each row from a block row. This does require more communication but still not more than
1131      the communication needed for the nonzero values  */
1132   nzmax = nz; /*  space a largest processor needs */
1133   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1134   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1135   cnt  = 0;
1136   for (i=0; i<a->mbs; i++) {
1137     pcnt = cnt;
1138     for (j=B->i[i]; j<B->i[i+1]; j++) {
1139       if ((col = garray[B->j[j]]) > cstart) break;
1140       for (l=0; l<bs; l++) {
1141         column_indices[cnt++] = bs*col+l;
1142       }
1143     }
1144     for (k=A->i[i]; k<A->i[i+1]; k++) {
1145       for (l=0; l<bs; l++) {
1146         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1147       }
1148     }
1149     for (; j<B->i[i+1]; j++) {
1150       for (l=0; l<bs; l++) {
1151         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1152       }
1153     }
1154     len = cnt - pcnt;
1155     for (k=1; k<bs; k++) {
1156       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1157       cnt += len;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161 
1162   /* store the columns to the file */
1163   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1164   if (!rank) {
1165     MPI_Status status;
1166     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1167     for (i=1; i<size; i++) {
1168       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1169       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1170       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1171       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     }
1173     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1174   } else {
1175     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1176     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1177     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1178     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1179   }
1180   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1181 
1182   /* load up the numerical values */
1183   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1184   cnt = 0;
1185   for (i=0; i<a->mbs; i++) {
1186     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1187     for (j=B->i[i]; j<B->i[i+1]; j++) {
1188       if (garray[B->j[j]] > cstart) break;
1189       for (l=0; l<bs; l++) {
1190         for (ll=0; ll<bs; ll++) {
1191           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1192         }
1193       }
1194       cnt += bs;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) {
1197       for (l=0; l<bs; l++) {
1198         for (ll=0; ll<bs; ll++) {
1199           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1200         }
1201       }
1202       cnt += bs;
1203     }
1204     for (; j<B->i[i+1]; j++) {
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     cnt += (bs-1)*rlen;
1213   }
1214   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1215 
1216   /* store the column values to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     MPI_Status status;
1220     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1221     for (i=1; i<size; i++) {
1222       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1223       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1224       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1278 #if defined (PETSC_USE_CTABLE)
1279   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1285   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt     max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) { max = tmp; }
1447     }
1448     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap  = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart)
1483               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size     = (PetscReal)matin->rmap->bs;
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1542   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1543   isend[3] += info->memory;  isend[4] += info->mallocs;
1544   if (flag == MAT_LOCAL) {
1545     info->nz_used      = isend[0];
1546     info->nz_allocated = isend[1];
1547     info->nz_unneeded  = isend[2];
1548     info->memory       = isend[3];
1549     info->mallocs      = isend[4];
1550   } else if (flag == MAT_GLOBAL_MAX) {
1551     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1552     info->nz_used      = irecv[0];
1553     info->nz_allocated = irecv[1];
1554     info->nz_unneeded  = irecv[2];
1555     info->memory       = irecv[3];
1556     info->mallocs      = irecv[4];
1557   } else if (flag == MAT_GLOBAL_SUM) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1566   info->fill_ratio_needed = 0;
1567   info->factor_mallocs    = 0;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1574 {
1575   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   switch (op) {
1580   case MAT_NEW_NONZERO_LOCATIONS:
1581   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1582   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583   case MAT_KEEP_NONZERO_PATTERN:
1584   case MAT_NEW_NONZERO_LOCATION_ERR:
1585     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1586     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1587     break;
1588   case MAT_ROW_ORIENTED:
1589     a->roworiented = flg;
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_NEW_DIAGONALS:
1594     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1595     break;
1596   case MAT_IGNORE_OFF_PROC_ENTRIES:
1597     a->donotstash = flg;
1598     break;
1599   case MAT_USE_HASH_TABLE:
1600     a->ht_flag = flg;
1601     break;
1602   case MAT_SYMMETRIC:
1603   case MAT_STRUCTURALLY_SYMMETRIC:
1604   case MAT_HERMITIAN:
1605   case MAT_SYMMETRY_ETERNAL:
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     break;
1608   default:
1609     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1617 {
1618   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1619   Mat_SeqBAIJ    *Aloc;
1620   Mat            B;
1621   PetscErrorCode ierr;
1622   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1623   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1624   MatScalar      *a;
1625 
1626   PetscFunctionBegin;
1627   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1628   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1629     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1630     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1631     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1632     /* Do not know preallocation information, but must set block size */
1633     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1634   } else {
1635     B = *matout;
1636   }
1637 
1638   /* copy over the A part */
1639   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1640   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1641   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1642 
1643   for (i=0; i<mbs; i++) {
1644     rvals[0] = bs*(baij->rstartbs + i);
1645     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1646     for (j=ai[i]; j<ai[i+1]; j++) {
1647       col = (baij->cstartbs+aj[j])*bs;
1648       for (k=0; k<bs; k++) {
1649         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1650         col++; a += bs;
1651       }
1652     }
1653   }
1654   /* copy over the B part */
1655   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1656   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1657   for (i=0; i<mbs; i++) {
1658     rvals[0] = bs*(baij->rstartbs + i);
1659     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1660     for (j=ai[i]; j<ai[i+1]; j++) {
1661       col = baij->garray[aj[j]]*bs;
1662       for (k=0; k<bs; k++) {
1663         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   ierr = PetscFree(rvals);CHKERRQ(ierr);
1669   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671 
1672   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1673     *matout = B;
1674   } else {
1675     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1682 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1683 {
1684   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1685   Mat            a = baij->A,b = baij->B;
1686   PetscErrorCode ierr;
1687   PetscInt       s1,s2,s3;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1691   if (rr) {
1692     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1693     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1694     /* Overlap communication with computation. */
1695     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696   }
1697   if (ll) {
1698     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1699     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1700     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1701   }
1702   /* scale  the diagonal block */
1703   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1704 
1705   if (rr) {
1706     /* Do a scatter end and then right scale the off-diagonal block */
1707     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1715 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1716 {
1717   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1718   PetscErrorCode    ierr;
1719   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1720   PetscInt          i,*owners = A->rmap->range;
1721   PetscInt          *nprocs,j,idx,nsends,row;
1722   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1723   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1724   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1725   MPI_Comm          comm = ((PetscObject)A)->comm;
1726   MPI_Request       *send_waits,*recv_waits;
1727   MPI_Status        recv_status,*send_status;
1728   const PetscScalar *xx;
1729   PetscScalar       *bb;
1730 #if defined(PETSC_DEBUG)
1731   PetscBool         found = PETSC_FALSE;
1732 #endif
1733 
1734   PetscFunctionBegin;
1735   /*  first count number of contributors to each processor */
1736   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1737   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1738   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1739   j = 0;
1740   for (i=0; i<N; i++) {
1741     if (lastidx > (idx = rows[i])) j = 0;
1742     lastidx = idx;
1743     for (; j<size; j++) {
1744       if (idx >= owners[j] && idx < owners[j+1]) {
1745         nprocs[2*j]++;
1746         nprocs[2*j+1] = 1;
1747         owner[i] = j;
1748 #if defined(PETSC_DEBUG)
1749         found = PETSC_TRUE;
1750 #endif
1751         break;
1752       }
1753     }
1754 #if defined(PETSC_DEBUG)
1755     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1756     found = PETSC_FALSE;
1757 #endif
1758   }
1759   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1760 
1761   if (A->nooffproczerorows) {
1762     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1763     nrecvs = nsends;
1764     nmax   = N;
1765   } else {
1766     /* inform other processors of number of messages and max length*/
1767     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1768   }
1769 
1770   /* post receives:   */
1771   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1772   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1773   for (i=0; i<nrecvs; i++) {
1774     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1775   }
1776 
1777   /* do sends:
1778      1) starts[i] gives the starting index in svalues for stuff going to
1779      the ith processor
1780   */
1781   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1782   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1783   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1784   starts[0]  = 0;
1785   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1786   for (i=0; i<N; i++) {
1787     svalues[starts[owner[i]]++] = rows[i];
1788   }
1789 
1790   starts[0] = 0;
1791   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1792   count = 0;
1793   for (i=0; i<size; i++) {
1794     if (nprocs[2*i+1]) {
1795       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1796     }
1797   }
1798   ierr = PetscFree(starts);CHKERRQ(ierr);
1799 
1800   base = owners[rank];
1801 
1802   /*  wait on receives */
1803   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1804   count  = nrecvs;
1805   slen = 0;
1806   while (count) {
1807     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1808     /* unpack receives into our local space */
1809     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1810     source[imdex]  = recv_status.MPI_SOURCE;
1811     lens[imdex]    = n;
1812     slen          += n;
1813     count--;
1814   }
1815   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1816 
1817   /* move the data into the send scatter */
1818   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1819   count = 0;
1820   for (i=0; i<nrecvs; i++) {
1821     values = rvalues + i*nmax;
1822     for (j=0; j<lens[i]; j++) {
1823       lrows[count++] = values[j] - base;
1824     }
1825   }
1826   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1827   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1828   ierr = PetscFree(owner);CHKERRQ(ierr);
1829   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1830 
1831   /* fix right hand side if needed */
1832   if (x && b) {
1833     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1834     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1835     for (i=0; i<slen; i++) {
1836       bb[lrows[i]] = diag*xx[lrows[i]];
1837     }
1838     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1839     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1840   }
1841 
1842   /* actually zap the local rows */
1843   /*
1844         Zero the required rows. If the "diagonal block" of the matrix
1845      is square and the user wishes to set the diagonal we use separate
1846      code so that MatSetValues() is not called for each diagonal allocating
1847      new memory, thus calling lots of mallocs and slowing things down.
1848 
1849   */
1850   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1851   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1852   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1853     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1854   } else if (diag != 0.0) {
1855     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1856     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1857        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1858     for (i=0; i<slen; i++) {
1859       row  = lrows[i] + rstart_bs;
1860       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1861     }
1862     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1863     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1864   } else {
1865     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1866   }
1867 
1868   ierr = PetscFree(lrows);CHKERRQ(ierr);
1869 
1870   /* wait on sends */
1871   if (nsends) {
1872     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1873     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1874     ierr = PetscFree(send_status);CHKERRQ(ierr);
1875   }
1876   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1877   ierr = PetscFree(svalues);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1883 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1884 {
1885   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1886   PetscErrorCode ierr;
1887 
1888   PetscFunctionBegin;
1889   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatEqual_MPIBAIJ"
1897 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1898 {
1899   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1900   Mat            a,b,c,d;
1901   PetscBool      flg;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   a = matA->A; b = matA->B;
1906   c = matB->A; d = matB->B;
1907 
1908   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1909   if (flg) {
1910     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1911   }
1912   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatCopy_MPIBAIJ"
1918 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1919 {
1920   PetscErrorCode ierr;
1921   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1922   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1923 
1924   PetscFunctionBegin;
1925   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1926   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1927     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1928   } else {
1929     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1930     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1937 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1938 {
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1948 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1949 {
1950   PetscErrorCode ierr;
1951   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1952   PetscBLASInt   bnz,one=1;
1953   Mat_SeqBAIJ    *x,*y;
1954 
1955   PetscFunctionBegin;
1956   if (str == SAME_NONZERO_PATTERN) {
1957     PetscScalar alpha = a;
1958     x    = (Mat_SeqBAIJ *)xx->A->data;
1959     y    = (Mat_SeqBAIJ *)yy->A->data;
1960     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1961     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1962     x    = (Mat_SeqBAIJ *)xx->B->data;
1963     y    = (Mat_SeqBAIJ *)yy->B->data;
1964     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1965     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1966   } else {
1967     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1974 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1975 {
1976   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1977   PetscErrorCode ierr;
1978 
1979   PetscFunctionBegin;
1980   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1981   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1987 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1988 {
1989   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1994   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2000 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2001 {
2002   PetscErrorCode ierr;
2003   IS             iscol_local;
2004   PetscInt       csize;
2005 
2006   PetscFunctionBegin;
2007   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2008   if (call == MAT_REUSE_MATRIX) {
2009     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2010     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2011   } else {
2012     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2013   }
2014   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2015   if (call == MAT_INITIAL_MATRIX) {
2016     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2017     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2024 /*
2025   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2026   in local and then by concatenating the local matrices the end result.
2027   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2028 */
2029 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2030 {
2031   PetscErrorCode ierr;
2032   PetscMPIInt    rank,size;
2033   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2034   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2035   Mat            M,Mreuse;
2036   MatScalar      *vwork,*aa;
2037   MPI_Comm       comm = ((PetscObject)mat)->comm;
2038   IS             isrow_new, iscol_new;
2039   PetscBool      idflag,allrows, allcols;
2040   Mat_SeqBAIJ    *aij;
2041 
2042   PetscFunctionBegin;
2043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2044   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2045   /* The compression and expansion should be avoided. Doesn't point
2046      out errors, might change the indices, hence buggey */
2047   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2048   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2049 
2050   /* Check for special case: each processor gets entire matrix columns */
2051   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2052   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2053   if (idflag && ncol == mat->cmap->N) {
2054     allcols = PETSC_TRUE;
2055   } else {
2056     allcols = PETSC_FALSE;
2057   }
2058 
2059   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2060   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2061   if (idflag && nrow == mat->rmap->N) {
2062     allrows = PETSC_TRUE;
2063   } else {
2064     allrows = PETSC_FALSE;
2065   }
2066   if (call ==  MAT_REUSE_MATRIX) {
2067     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2068     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2069     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2070   } else {
2071     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2072   }
2073   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2074   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2075   /*
2076       m - number of local rows
2077       n - number of columns (same on all processors)
2078       rstart - first row in new global matrix generated
2079   */
2080   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2081   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2082   m    = m/bs;
2083   n    = n/bs;
2084 
2085   if (call == MAT_INITIAL_MATRIX) {
2086     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2087     ii  = aij->i;
2088     jj  = aij->j;
2089 
2090     /*
2091         Determine the number of non-zeros in the diagonal and off-diagonal
2092         portions of the matrix in order to do correct preallocation
2093     */
2094 
2095     /* first get start and end of "diagonal" columns */
2096     if (csize == PETSC_DECIDE) {
2097       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2098       if (mglobal == n*bs) { /* square matrix */
2099         nlocal = m;
2100       } else {
2101         nlocal = n/size + ((n % size) > rank);
2102       }
2103     } else {
2104       nlocal = csize/bs;
2105     }
2106     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2107     rstart = rend - nlocal;
2108     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2109 
2110     /* next, compute all the lengths */
2111     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2112     olens = dlens + m;
2113     for (i=0; i<m; i++) {
2114       jend = ii[i+1] - ii[i];
2115       olen = 0;
2116       dlen = 0;
2117       for (j=0; j<jend; j++) {
2118         if (*jj < rstart || *jj >= rend) olen++;
2119         else dlen++;
2120         jj++;
2121       }
2122       olens[i] = olen;
2123       dlens[i] = dlen;
2124     }
2125     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2126     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2127     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2128     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2129     ierr = PetscFree(dlens);CHKERRQ(ierr);
2130   } else {
2131     PetscInt ml,nl;
2132 
2133     M = *newmat;
2134     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2135     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2136     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2137     /*
2138          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2139        rather than the slower MatSetValues().
2140     */
2141     M->was_assembled = PETSC_TRUE;
2142     M->assembled     = PETSC_FALSE;
2143   }
2144   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2145   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2146   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2147   ii  = aij->i;
2148   jj  = aij->j;
2149   aa  = aij->a;
2150   for (i=0; i<m; i++) {
2151     row   = rstart/bs + i;
2152     nz    = ii[i+1] - ii[i];
2153     cwork = jj;     jj += nz;
2154     vwork = aa;     aa += nz*bs*bs;
2155     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2156   }
2157 
2158   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   *newmat = M;
2161 
2162   /* save submatrix used in processor for next request */
2163   if (call ==  MAT_INITIAL_MATRIX) {
2164     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2165     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatPermute_MPIBAIJ"
2172 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2173 {
2174   MPI_Comm       comm,pcomm;
2175   PetscInt       first,rlocal_size,clocal_size,nrows;
2176   const PetscInt *rows;
2177   PetscMPIInt    size;
2178   IS             crowp,growp,irowp,lrowp,lcolp;
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2183   /* make a collective version of 'rowp' */
2184   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2185   if (pcomm==comm) {
2186     crowp = rowp;
2187   } else {
2188     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2189     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2190     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2191     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2192   }
2193   /* collect the global row permutation and invert it */
2194   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2195   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2196   if (pcomm!=comm) {
2197     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2198   }
2199   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2200   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2201   /* get the local target indices */
2202   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2203   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2204   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2205   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2206   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2207   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2208   /* the column permutation is so much easier;
2209      make a local version of 'colp' and invert it */
2210   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2211   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2212   if (size==1) {
2213     lcolp = colp;
2214   } else {
2215     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2216   }
2217   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2218   /* now we just get the submatrix */
2219   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2220   if (size>1) {
2221     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2222   }
2223   /* clean up */
2224   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2230 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2231 {
2232   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2233   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2234 
2235   PetscFunctionBegin;
2236   if (nghosts) { *nghosts = B->nbs;}
2237   if (ghosts) {*ghosts = baij->garray;}
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2245 /*
2246     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2247 */
2248 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2249 {
2250   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2251   PetscErrorCode        ierr;
2252   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2253   PetscInt              bs,i,n,nrows,j,k,m,ncols,col;
2254   const PetscInt        *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2255   PetscInt              nis = iscoloring->n,nctot,*cols;
2256   PetscInt              *rowhit,M,cstart,cend,colb;
2257   PetscInt              *columnsforrow,l;
2258   IS                    *isa;
2259   PetscBool              done,flg;
2260   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2261   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2262 
2263   PetscFunctionBegin;
2264   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2265   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2266 
2267   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2268   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2269   M                = mat->rmap->n/bs;
2270   cstart           = mat->cmap->rstart/bs;
2271   cend             = mat->cmap->rend/bs;
2272   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2273   c->N             = mat->cmap->N/bs;
2274   c->m             = mat->rmap->n/bs;
2275   c->rstart        = mat->rmap->rstart/bs;
2276 
2277   c->ncolors       = nis;
2278   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2279   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2280   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2281   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2282   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2283   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2284 
2285   /* Allow access to data structures of local part of matrix */
2286   if (!baij->colmap) {
2287     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2288   }
2289   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2290   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2291 
2292   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2293   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2294 
2295   for (i=0; i<nis; i++) {
2296     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2297     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2298     c->ncolumns[i] = n;
2299     if (n) {
2300       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2301       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2302       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2303     } else {
2304       c->columns[i]  = 0;
2305     }
2306 
2307     if (ctype == IS_COLORING_GLOBAL) {
2308       /* Determine the total (parallel) number of columns of this color */
2309       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2310       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2311 
2312       ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2313       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2314       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2315       if (!nctot) {
2316         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2317       }
2318 
2319       disp[0] = 0;
2320       for (j=1; j<size; j++) {
2321         disp[j] = disp[j-1] + ncolsonproc[j-1];
2322       }
2323 
2324       /* Get complete list of columns for color on each processor */
2325       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2326       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2327       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2328     } else if (ctype == IS_COLORING_GHOSTED) {
2329       /* Determine local number of columns of this color on this process, including ghost points */
2330       nctot = n;
2331       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2332       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2333     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2334 
2335     /*
2336        Mark all rows affect by these columns
2337     */
2338     /* Temporary option to allow for debugging/testing */
2339     flg  = PETSC_FALSE;
2340     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2341     if (!flg) {/*-----------------------------------------------------------------------------*/
2342       /* crude, fast version */
2343       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2344       /* loop over columns*/
2345       for (j=0; j<nctot; j++) {
2346         if (ctype == IS_COLORING_GHOSTED) {
2347           col = ltog[cols[j]];
2348         } else {
2349           col  = cols[j];
2350         }
2351         if (col >= cstart && col < cend) {
2352           /* column is in diagonal block of matrix */
2353           rows = A_cj + A_ci[col-cstart];
2354           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2355         } else {
2356 #if defined (PETSC_USE_CTABLE)
2357           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2358           colb --;
2359 #else
2360           colb = baij->colmap[col] - 1;
2361 #endif
2362           if (colb == -1) {
2363             m = 0;
2364           } else {
2365             colb = colb/bs;
2366             rows = B_cj + B_ci[colb];
2367             m    = B_ci[colb+1] - B_ci[colb];
2368           }
2369         }
2370         /* loop over columns marking them in rowhit */
2371         for (k=0; k<m; k++) {
2372           rowhit[*rows++] = col + 1;
2373         }
2374       }
2375 
2376       /* count the number of hits */
2377       nrows = 0;
2378       for (j=0; j<M; j++) {
2379         if (rowhit[j]) nrows++;
2380       }
2381       c->nrows[i]         = nrows;
2382       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2383       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2384       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2385       nrows = 0;
2386       for (j=0; j<M; j++) {
2387         if (rowhit[j]) {
2388           c->rows[i][nrows]           = j;
2389           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2390           nrows++;
2391         }
2392       }
2393     } else {/*-------------------------------------------------------------------------------*/
2394       /* slow version, using rowhit as a linked list */
2395       PetscInt currentcol,fm,mfm;
2396       rowhit[M] = M;
2397       nrows     = 0;
2398       /* loop over columns*/
2399       for (j=0; j<nctot; j++) {
2400         if (ctype == IS_COLORING_GHOSTED) {
2401           col = ltog[cols[j]];
2402         } else {
2403           col  = cols[j];
2404         }
2405         if (col >= cstart && col < cend) {
2406           /* column is in diagonal block of matrix */
2407           rows = A_cj + A_ci[col-cstart];
2408           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2409         } else {
2410 #if defined (PETSC_USE_CTABLE)
2411           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2412           colb --;
2413 #else
2414           colb = baij->colmap[col] - 1;
2415 #endif
2416           if (colb == -1) {
2417             m = 0;
2418           } else {
2419             colb = colb/bs;
2420             rows = B_cj + B_ci[colb];
2421             m    = B_ci[colb+1] - B_ci[colb];
2422           }
2423         }
2424 
2425         /* loop over columns marking them in rowhit */
2426         fm    = M; /* fm points to first entry in linked list */
2427         for (k=0; k<m; k++) {
2428           currentcol = *rows++;
2429           /* is it already in the list? */
2430           do {
2431             mfm  = fm;
2432             fm   = rowhit[fm];
2433           } while (fm < currentcol);
2434           /* not in list so add it */
2435           if (fm != currentcol) {
2436             nrows++;
2437             columnsforrow[currentcol] = col;
2438             /* next three lines insert new entry into linked list */
2439             rowhit[mfm]               = currentcol;
2440             rowhit[currentcol]        = fm;
2441             fm                        = currentcol;
2442             /* fm points to present position in list since we know the columns are sorted */
2443           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2444         }
2445       }
2446       c->nrows[i]         = nrows;
2447       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2448       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2449       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2450       /* now store the linked list of rows into c->rows[i] */
2451       nrows = 0;
2452       fm    = rowhit[M];
2453       do {
2454         c->rows[i][nrows]            = fm;
2455         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2456         fm                           = rowhit[fm];
2457       } while (fm < M);
2458     } /* ---------------------------------------------------------------------------------------*/
2459     ierr = PetscFree(cols);CHKERRQ(ierr);
2460   }
2461 
2462   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2463   /*
2464        vscale will contain the "diagonal" on processor scalings followed by the off processor
2465   */
2466   if (ctype == IS_COLORING_GLOBAL) {
2467     PetscInt *garray;
2468     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2469     for (i=0; i<baij->B->cmap->n/bs; i++) {
2470       for (j=0; j<bs; j++) {
2471         garray[i*bs+j] = bs*baij->garray[i]+j;
2472       }
2473     }
2474     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2475     ierr = PetscFree(garray);CHKERRQ(ierr);
2476     CHKMEMQ;
2477     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2478     for (k=0; k<c->ncolors; k++) {
2479       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2480       for (l=0; l<c->nrows[k]; l++) {
2481         col = c->columnsforrow[k][l];
2482         if (col >= cstart && col < cend) {
2483           /* column is in diagonal block of matrix */
2484           colb = col - cstart;
2485         } else {
2486           /* column  is in "off-processor" part */
2487 #if defined (PETSC_USE_CTABLE)
2488           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2489           colb --;
2490 #else
2491           colb = baij->colmap[col] - 1;
2492 #endif
2493           colb = colb/bs;
2494           colb += cend - cstart;
2495         }
2496         c->vscaleforrow[k][l] = colb;
2497       }
2498     }
2499   } else if (ctype == IS_COLORING_GHOSTED) {
2500     /* Get gtol mapping */
2501     PetscInt N = mat->cmap->N, *gtol;
2502     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2503     for (i=0; i<N; i++) gtol[i] = -1;
2504     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2505 
2506     c->vscale = 0; /* will be created in MatFDColoringApply() */
2507     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2508     for (k=0; k<c->ncolors; k++) {
2509       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2510       for (l=0; l<c->nrows[k]; l++) {
2511         col = c->columnsforrow[k][l];      /* global column index */
2512         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2513       }
2514     }
2515     ierr = PetscFree(gtol);CHKERRQ(ierr);
2516   }
2517   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2518 
2519   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2520   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2521   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2522   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2523     CHKMEMQ;
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2529 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2530 {
2531   Mat            B;
2532   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2533   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2534   Mat_SeqAIJ     *b;
2535   PetscErrorCode ierr;
2536   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2537   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2538   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2539 
2540   PetscFunctionBegin;
2541   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2542   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2543 
2544   /* ----------------------------------------------------------------
2545      Tell every processor the number of nonzeros per row
2546   */
2547   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2548   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2549     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2550   }
2551   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2552   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2553   displs     = recvcounts + size;
2554   for (i=0; i<size; i++) {
2555     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2556     displs[i]     = A->rmap->range[i]/bs;
2557   }
2558 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2559   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2560 #else
2561   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2562 #endif
2563   /* ---------------------------------------------------------------
2564      Create the sequential matrix of the same type as the local block diagonal
2565   */
2566   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2567   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2568   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2569   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2570   b = (Mat_SeqAIJ *)B->data;
2571 
2572   /*--------------------------------------------------------------------
2573     Copy my part of matrix column indices over
2574   */
2575   sendcount  = ad->nz + bd->nz;
2576   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2577   a_jsendbuf = ad->j;
2578   b_jsendbuf = bd->j;
2579   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2580   cnt        = 0;
2581   for (i=0; i<n; i++) {
2582 
2583     /* put in lower diagonal portion */
2584     m = bd->i[i+1] - bd->i[i];
2585     while (m > 0) {
2586       /* is it above diagonal (in bd (compressed) numbering) */
2587       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2588       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2589       m--;
2590     }
2591 
2592     /* put in diagonal portion */
2593     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2594       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2595     }
2596 
2597     /* put in upper diagonal portion */
2598     while (m-- > 0) {
2599       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2600     }
2601   }
2602   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2603 
2604   /*--------------------------------------------------------------------
2605     Gather all column indices to all processors
2606   */
2607   for (i=0; i<size; i++) {
2608     recvcounts[i] = 0;
2609     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2610       recvcounts[i] += lens[j];
2611     }
2612   }
2613   displs[0]  = 0;
2614   for (i=1; i<size; i++) {
2615     displs[i] = displs[i-1] + recvcounts[i-1];
2616   }
2617 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2618   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2619 #else
2620   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2621 #endif
2622   /*--------------------------------------------------------------------
2623     Assemble the matrix into useable form (note numerical values not yet set)
2624   */
2625   /* set the b->ilen (length of each row) values */
2626   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2627   /* set the b->i indices */
2628   b->i[0] = 0;
2629   for (i=1; i<=A->rmap->N/bs; i++) {
2630     b->i[i] = b->i[i-1] + lens[i-1];
2631   }
2632   ierr = PetscFree(lens);CHKERRQ(ierr);
2633   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2634   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2635   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2636 
2637   if (A->symmetric) {
2638     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2639   } else if (A->hermitian) {
2640     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2641   } else if (A->structurally_symmetric) {
2642     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2643   }
2644   *newmat = B;
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatSOR_MPIBAIJ"
2650 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2651 {
2652   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2653   PetscErrorCode ierr;
2654   Vec            bb1 = 0;
2655 
2656   PetscFunctionBegin;
2657   if (flag == SOR_APPLY_UPPER) {
2658     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2659     PetscFunctionReturn(0);
2660   }
2661 
2662   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2663     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2664   }
2665 
2666   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2667     if (flag & SOR_ZERO_INITIAL_GUESS) {
2668       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2669       its--;
2670     }
2671 
2672     while (its--) {
2673       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2674       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2675 
2676       /* update rhs: bb1 = bb - B*x */
2677       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2678       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2679 
2680       /* local sweep */
2681       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2682     }
2683   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2684     if (flag & SOR_ZERO_INITIAL_GUESS) {
2685       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2686       its--;
2687     }
2688     while (its--) {
2689       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691 
2692       /* update rhs: bb1 = bb - B*x */
2693       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2694       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2695 
2696       /* local sweep */
2697       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2698     }
2699   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2700     if (flag & SOR_ZERO_INITIAL_GUESS) {
2701       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2702       its--;
2703     }
2704     while (its--) {
2705       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707 
2708       /* update rhs: bb1 = bb - B*x */
2709       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2710       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2711 
2712       /* local sweep */
2713       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2714     }
2715   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2716 
2717   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2725 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2726 {
2727   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 
2736 /* -------------------------------------------------------------------*/
2737 static struct _MatOps MatOps_Values = {
2738        MatSetValues_MPIBAIJ,
2739        MatGetRow_MPIBAIJ,
2740        MatRestoreRow_MPIBAIJ,
2741        MatMult_MPIBAIJ,
2742 /* 4*/ MatMultAdd_MPIBAIJ,
2743        MatMultTranspose_MPIBAIJ,
2744        MatMultTransposeAdd_MPIBAIJ,
2745        0,
2746        0,
2747        0,
2748 /*10*/ 0,
2749        0,
2750        0,
2751        MatSOR_MPIBAIJ,
2752        MatTranspose_MPIBAIJ,
2753 /*15*/ MatGetInfo_MPIBAIJ,
2754        MatEqual_MPIBAIJ,
2755        MatGetDiagonal_MPIBAIJ,
2756        MatDiagonalScale_MPIBAIJ,
2757        MatNorm_MPIBAIJ,
2758 /*20*/ MatAssemblyBegin_MPIBAIJ,
2759        MatAssemblyEnd_MPIBAIJ,
2760        MatSetOption_MPIBAIJ,
2761        MatZeroEntries_MPIBAIJ,
2762 /*24*/ MatZeroRows_MPIBAIJ,
2763        0,
2764        0,
2765        0,
2766        0,
2767 /*29*/ MatSetUp_MPIBAIJ,
2768        0,
2769        0,
2770        0,
2771        0,
2772 /*34*/ MatDuplicate_MPIBAIJ,
2773        0,
2774        0,
2775        0,
2776        0,
2777 /*39*/ MatAXPY_MPIBAIJ,
2778        MatGetSubMatrices_MPIBAIJ,
2779        MatIncreaseOverlap_MPIBAIJ,
2780        MatGetValues_MPIBAIJ,
2781        MatCopy_MPIBAIJ,
2782 /*44*/ 0,
2783        MatScale_MPIBAIJ,
2784        0,
2785        0,
2786        0,
2787 /*49*/ 0,
2788        0,
2789        0,
2790        0,
2791        0,
2792 /*54*/ MatFDColoringCreate_MPIBAIJ,
2793        0,
2794        MatSetUnfactored_MPIBAIJ,
2795        MatPermute_MPIBAIJ,
2796        MatSetValuesBlocked_MPIBAIJ,
2797 /*59*/ MatGetSubMatrix_MPIBAIJ,
2798        MatDestroy_MPIBAIJ,
2799        MatView_MPIBAIJ,
2800        0,
2801        0,
2802 /*64*/ 0,
2803        0,
2804        0,
2805        0,
2806        0,
2807 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2808        0,
2809        0,
2810        0,
2811        0,
2812 /*74*/ 0,
2813        MatFDColoringApply_BAIJ,
2814        0,
2815        0,
2816        0,
2817 /*79*/ 0,
2818        0,
2819        0,
2820        0,
2821        MatLoad_MPIBAIJ,
2822 /*84*/ 0,
2823        0,
2824        0,
2825        0,
2826        0,
2827 /*89*/ 0,
2828        0,
2829        0,
2830        0,
2831        0,
2832 /*94*/ 0,
2833        0,
2834        0,
2835        0,
2836        0,
2837 /*99*/ 0,
2838        0,
2839        0,
2840        0,
2841        0,
2842 /*104*/0,
2843        MatRealPart_MPIBAIJ,
2844        MatImaginaryPart_MPIBAIJ,
2845        0,
2846        0,
2847 /*109*/0,
2848        0,
2849        0,
2850        0,
2851        0,
2852 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2853        0,
2854        MatGetGhosts_MPIBAIJ,
2855        0,
2856        0,
2857 /*119*/0,
2858        0,
2859        0,
2860        0,
2861        0,
2862 /*124*/0,
2863        0,
2864        MatInvertBlockDiagonal_MPIBAIJ
2865 };
2866 
2867 EXTERN_C_BEGIN
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2870 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2871 {
2872   PetscFunctionBegin;
2873   *a = ((Mat_MPIBAIJ *)A->data)->A;
2874   PetscFunctionReturn(0);
2875 }
2876 EXTERN_C_END
2877 
2878 EXTERN_C_BEGIN
2879 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2880 EXTERN_C_END
2881 
2882 EXTERN_C_BEGIN
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2885 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2886 {
2887   PetscInt       m,rstart,cstart,cend;
2888   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2889   const PetscInt *JJ=0;
2890   PetscScalar    *values=0;
2891   PetscErrorCode ierr;
2892 
2893   PetscFunctionBegin;
2894   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2895   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2896   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2897   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2898   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2899   m      = B->rmap->n/bs;
2900   rstart = B->rmap->rstart/bs;
2901   cstart = B->cmap->rstart/bs;
2902   cend   = B->cmap->rend/bs;
2903 
2904   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2905   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2906   for (i=0; i<m; i++) {
2907     nz = ii[i+1] - ii[i];
2908     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2909     nz_max = PetscMax(nz_max,nz);
2910     JJ  = jj + ii[i];
2911     for (j=0; j<nz; j++) {
2912       if (*JJ >= cstart) break;
2913       JJ++;
2914     }
2915     d = 0;
2916     for (; j<nz; j++) {
2917       if (*JJ++ >= cend) break;
2918       d++;
2919     }
2920     d_nnz[i] = d;
2921     o_nnz[i] = nz - d;
2922   }
2923   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2924   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2925 
2926   values = (PetscScalar*)V;
2927   if (!values) {
2928     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2929     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2930   }
2931   for (i=0; i<m; i++) {
2932     PetscInt          row    = i + rstart;
2933     PetscInt          ncols  = ii[i+1] - ii[i];
2934     const PetscInt    *icols = jj + ii[i];
2935     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2936     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2937   }
2938 
2939   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2940   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2941   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2942   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2943   PetscFunctionReturn(0);
2944 }
2945 EXTERN_C_END
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2949 /*@C
2950    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2951    (the default parallel PETSc format).
2952 
2953    Collective on MPI_Comm
2954 
2955    Input Parameters:
2956 +  A - the matrix
2957 .  bs - the block size
2958 .  i - the indices into j for the start of each local row (starts with zero)
2959 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2960 -  v - optional values in the matrix
2961 
2962    Level: developer
2963 
2964 .keywords: matrix, aij, compressed row, sparse, parallel
2965 
2966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2967 @*/
2968 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2969 {
2970   PetscErrorCode ierr;
2971 
2972   PetscFunctionBegin;
2973   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2974   PetscValidType(B,1);
2975   PetscValidLogicalCollectiveInt(B,bs,2);
2976   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 EXTERN_C_BEGIN
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2983 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2984 {
2985   Mat_MPIBAIJ    *b;
2986   PetscErrorCode ierr;
2987   PetscInt       i;
2988   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
2989 
2990   PetscFunctionBegin;
2991   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
2992   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
2993 
2994   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2995   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2996   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2997   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2998 
2999   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3000   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3001   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3002   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3003   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3004 
3005   if (d_nnz) {
3006     for (i=0; i<B->rmap->n/bs; i++) {
3007       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3008     }
3009   }
3010   if (o_nnz) {
3011     for (i=0; i<B->rmap->n/bs; i++) {
3012       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3013     }
3014   }
3015 
3016   b = (Mat_MPIBAIJ*)B->data;
3017   b->bs2 = bs*bs;
3018   b->mbs = B->rmap->n/bs;
3019   b->nbs = B->cmap->n/bs;
3020   b->Mbs = B->rmap->N/bs;
3021   b->Nbs = B->cmap->N/bs;
3022 
3023   for (i=0; i<=b->size; i++) {
3024     b->rangebs[i] = B->rmap->range[i]/bs;
3025   }
3026   b->rstartbs = B->rmap->rstart/bs;
3027   b->rendbs   = B->rmap->rend/bs;
3028   b->cstartbs = B->cmap->rstart/bs;
3029   b->cendbs   = B->cmap->rend/bs;
3030 
3031   if (!B->preallocated) {
3032     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3033     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3034     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3035     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3036     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3037     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3038     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3039     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3040     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3041   }
3042 
3043   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3044   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3045   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3046   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3047   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3048   B->preallocated = PETSC_TRUE;
3049   PetscFunctionReturn(0);
3050 }
3051 EXTERN_C_END
3052 
3053 EXTERN_C_BEGIN
3054 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3055 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3056 EXTERN_C_END
3057 
3058 
3059 EXTERN_C_BEGIN
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3062 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3063 {
3064   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3065   PetscErrorCode ierr;
3066   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3067   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3068   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3069 
3070   PetscFunctionBegin;
3071   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3072   ii[0] = 0;
3073   CHKMEMQ;
3074   for (i=0; i<M; i++) {
3075     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3076     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3077     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3078     /* remove one from count of matrix has diagonal */
3079     for (j=id[i]; j<id[i+1]; j++) {
3080       if (jd[j] == i) {ii[i+1]--;break;}
3081     }
3082   CHKMEMQ;
3083   }
3084   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3085   cnt = 0;
3086   for (i=0; i<M; i++) {
3087     for (j=io[i]; j<io[i+1]; j++) {
3088       if (garray[jo[j]] > rstart) break;
3089       jj[cnt++] = garray[jo[j]];
3090   CHKMEMQ;
3091     }
3092     for (k=id[i]; k<id[i+1]; k++) {
3093       if (jd[k] != i) {
3094         jj[cnt++] = rstart + jd[k];
3095   CHKMEMQ;
3096       }
3097     }
3098     for (;j<io[i+1]; j++) {
3099       jj[cnt++] = garray[jo[j]];
3100   CHKMEMQ;
3101     }
3102   }
3103   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3104   PetscFunctionReturn(0);
3105 }
3106 EXTERN_C_END
3107 
3108 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3109 EXTERN_C_BEGIN
3110 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3111 EXTERN_C_END
3112 
3113 EXTERN_C_BEGIN
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3116 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3117 {
3118   PetscErrorCode ierr;
3119   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3120   Mat            B;
3121   Mat_MPIAIJ     *b;
3122 
3123   PetscFunctionBegin;
3124   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3125 
3126   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3127   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3128   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3129   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3130   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3131   b = (Mat_MPIAIJ*) B->data;
3132 
3133   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3134   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3135   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3136   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3137   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3138   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   if (reuse == MAT_REUSE_MATRIX) {
3143     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3144   } else {
3145    *newmat = B;
3146   }
3147   PetscFunctionReturn(0);
3148 }
3149 EXTERN_C_END
3150 
3151 EXTERN_C_BEGIN
3152 #if defined(PETSC_HAVE_MUMPS)
3153 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3154 #endif
3155 EXTERN_C_END
3156 
3157 /*MC
3158    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3159 
3160    Options Database Keys:
3161 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3162 . -mat_block_size <bs> - set the blocksize used to store the matrix
3163 - -mat_use_hash_table <fact>
3164 
3165   Level: beginner
3166 
3167 .seealso: MatCreateMPIBAIJ
3168 M*/
3169 
3170 EXTERN_C_BEGIN
3171 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3172 EXTERN_C_END
3173 
3174 EXTERN_C_BEGIN
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatCreate_MPIBAIJ"
3177 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3178 {
3179   Mat_MPIBAIJ    *b;
3180   PetscErrorCode ierr;
3181   PetscBool      flg;
3182 
3183   PetscFunctionBegin;
3184   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3185   B->data = (void*)b;
3186 
3187   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3188   B->assembled  = PETSC_FALSE;
3189 
3190   B->insertmode = NOT_SET_VALUES;
3191   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3192   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3193 
3194   /* build local table of row and column ownerships */
3195   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3196 
3197   /* build cache for off array entries formed */
3198   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3199   b->donotstash  = PETSC_FALSE;
3200   b->colmap      = PETSC_NULL;
3201   b->garray      = PETSC_NULL;
3202   b->roworiented = PETSC_TRUE;
3203 
3204   /* stuff used in block assembly */
3205   b->barray       = 0;
3206 
3207   /* stuff used for matrix vector multiply */
3208   b->lvec         = 0;
3209   b->Mvctx        = 0;
3210 
3211   /* stuff for MatGetRow() */
3212   b->rowindices   = 0;
3213   b->rowvalues    = 0;
3214   b->getrowactive = PETSC_FALSE;
3215 
3216   /* hash table stuff */
3217   b->ht           = 0;
3218   b->hd           = 0;
3219   b->ht_size      = 0;
3220   b->ht_flag      = PETSC_FALSE;
3221   b->ht_fact      = 0;
3222   b->ht_total_ct  = 0;
3223   b->ht_insert_ct = 0;
3224 
3225   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3226   b->ijonly       = PETSC_FALSE;
3227 
3228   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3229     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3230     if (flg) {
3231       PetscReal fact = 1.39;
3232       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3233       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3234       if (fact <= 1.0) fact = 1.39;
3235       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3236       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3237     }
3238   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3239 
3240 #if defined(PETSC_HAVE_MUMPS)
3241   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3242 #endif
3243   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3244                                      "MatConvert_MPIBAIJ_MPIAdj",
3245                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3246   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3247                                      "MatConvert_MPIBAIJ_MPIAIJ",
3248                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3250                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3251                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3252   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3253                                      "MatStoreValues_MPIBAIJ",
3254                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3256                                      "MatRetrieveValues_MPIBAIJ",
3257                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3258   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3259                                      "MatGetDiagonalBlock_MPIBAIJ",
3260                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3261   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3262                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3263                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3265                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3266                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3268                                      "MatDiagonalScaleLocal_MPIBAIJ",
3269                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3271                                      "MatSetHashTableFactor_MPIBAIJ",
3272                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3274                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3275                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3276   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3277   PetscFunctionReturn(0);
3278 }
3279 EXTERN_C_END
3280 
3281 /*MC
3282    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3283 
3284    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3285    and MATMPIBAIJ otherwise.
3286 
3287    Options Database Keys:
3288 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3289 
3290   Level: beginner
3291 
3292 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3293 M*/
3294 
3295 #undef __FUNCT__
3296 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3297 /*@C
3298    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3299    (block compressed row).  For good matrix assembly performance
3300    the user should preallocate the matrix storage by setting the parameters
3301    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3302    performance can be increased by more than a factor of 50.
3303 
3304    Collective on Mat
3305 
3306    Input Parameters:
3307 +  A - the matrix
3308 .  bs   - size of blockk
3309 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3310            submatrix  (same for all local rows)
3311 .  d_nnz - array containing the number of block nonzeros in the various block rows
3312            of the in diagonal portion of the local (possibly different for each block
3313            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3314            set it even if it is zero.
3315 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3316            submatrix (same for all local rows).
3317 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3318            off-diagonal portion of the local submatrix (possibly different for
3319            each block row) or PETSC_NULL.
3320 
3321    If the *_nnz parameter is given then the *_nz parameter is ignored
3322 
3323    Options Database Keys:
3324 +   -mat_block_size - size of the blocks to use
3325 -   -mat_use_hash_table <fact>
3326 
3327    Notes:
3328    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3329    than it must be used on all processors that share the object for that argument.
3330 
3331    Storage Information:
3332    For a square global matrix we define each processor's diagonal portion
3333    to be its local rows and the corresponding columns (a square submatrix);
3334    each processor's off-diagonal portion encompasses the remainder of the
3335    local matrix (a rectangular submatrix).
3336 
3337    The user can specify preallocated storage for the diagonal part of
3338    the local submatrix with either d_nz or d_nnz (not both).  Set
3339    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3340    memory allocation.  Likewise, specify preallocated storage for the
3341    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3342 
3343    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3344    the figure below we depict these three local rows and all columns (0-11).
3345 
3346 .vb
3347            0 1 2 3 4 5 6 7 8 9 10 11
3348           -------------------
3349    row 3  |  o o o d d d o o o o o o
3350    row 4  |  o o o d d d o o o o o o
3351    row 5  |  o o o d d d o o o o o o
3352           -------------------
3353 .ve
3354 
3355    Thus, any entries in the d locations are stored in the d (diagonal)
3356    submatrix, and any entries in the o locations are stored in the
3357    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3358    stored simply in the MATSEQBAIJ format for compressed row storage.
3359 
3360    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3361    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3362    In general, for PDE problems in which most nonzeros are near the diagonal,
3363    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3364    or you will get TERRIBLE performance; see the users' manual chapter on
3365    matrices.
3366 
3367    You can call MatGetInfo() to get information on how effective the preallocation was;
3368    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3369    You can also run with the option -info and look for messages with the string
3370    malloc in them to see if additional memory allocation was needed.
3371 
3372    Level: intermediate
3373 
3374 .keywords: matrix, block, aij, compressed row, sparse, parallel
3375 
3376 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3377 @*/
3378 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3379 {
3380   PetscErrorCode ierr;
3381 
3382   PetscFunctionBegin;
3383   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3384   PetscValidType(B,1);
3385   PetscValidLogicalCollectiveInt(B,bs,2);
3386   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 #undef __FUNCT__
3391 #define __FUNCT__ "MatCreateBAIJ"
3392 /*@C
3393    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3394    (block compressed row).  For good matrix assembly performance
3395    the user should preallocate the matrix storage by setting the parameters
3396    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3397    performance can be increased by more than a factor of 50.
3398 
3399    Collective on MPI_Comm
3400 
3401    Input Parameters:
3402 +  comm - MPI communicator
3403 .  bs   - size of blockk
3404 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3405            This value should be the same as the local size used in creating the
3406            y vector for the matrix-vector product y = Ax.
3407 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3408            This value should be the same as the local size used in creating the
3409            x vector for the matrix-vector product y = Ax.
3410 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3411 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3412 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3413            submatrix  (same for all local rows)
3414 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3415            of the in diagonal portion of the local (possibly different for each block
3416            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3417            and set it even if it is zero.
3418 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3419            submatrix (same for all local rows).
3420 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3421            off-diagonal portion of the local submatrix (possibly different for
3422            each block row) or PETSC_NULL.
3423 
3424    Output Parameter:
3425 .  A - the matrix
3426 
3427    Options Database Keys:
3428 +   -mat_block_size - size of the blocks to use
3429 -   -mat_use_hash_table <fact>
3430 
3431    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3432    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3433    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3434 
3435    Notes:
3436    If the *_nnz parameter is given then the *_nz parameter is ignored
3437 
3438    A nonzero block is any block that as 1 or more nonzeros in it
3439 
3440    The user MUST specify either the local or global matrix dimensions
3441    (possibly both).
3442 
3443    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3444    than it must be used on all processors that share the object for that argument.
3445 
3446    Storage Information:
3447    For a square global matrix we define each processor's diagonal portion
3448    to be its local rows and the corresponding columns (a square submatrix);
3449    each processor's off-diagonal portion encompasses the remainder of the
3450    local matrix (a rectangular submatrix).
3451 
3452    The user can specify preallocated storage for the diagonal part of
3453    the local submatrix with either d_nz or d_nnz (not both).  Set
3454    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3455    memory allocation.  Likewise, specify preallocated storage for the
3456    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3457 
3458    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3459    the figure below we depict these three local rows and all columns (0-11).
3460 
3461 .vb
3462            0 1 2 3 4 5 6 7 8 9 10 11
3463           -------------------
3464    row 3  |  o o o d d d o o o o o o
3465    row 4  |  o o o d d d o o o o o o
3466    row 5  |  o o o d d d o o o o o o
3467           -------------------
3468 .ve
3469 
3470    Thus, any entries in the d locations are stored in the d (diagonal)
3471    submatrix, and any entries in the o locations are stored in the
3472    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3473    stored simply in the MATSEQBAIJ format for compressed row storage.
3474 
3475    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3476    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3477    In general, for PDE problems in which most nonzeros are near the diagonal,
3478    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3479    or you will get TERRIBLE performance; see the users' manual chapter on
3480    matrices.
3481 
3482    Level: intermediate
3483 
3484 .keywords: matrix, block, aij, compressed row, sparse, parallel
3485 
3486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3487 @*/
3488 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3489 {
3490   PetscErrorCode ierr;
3491   PetscMPIInt    size;
3492 
3493   PetscFunctionBegin;
3494   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3495   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3496   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3497   if (size > 1) {
3498     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3499     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3500   } else {
3501     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3502     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3503   }
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3509 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3510 {
3511   Mat            mat;
3512   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3513   PetscErrorCode ierr;
3514   PetscInt       len=0;
3515 
3516   PetscFunctionBegin;
3517   *newmat       = 0;
3518   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3519   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3520   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3521   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3522 
3523   mat->factortype   = matin->factortype;
3524   mat->preallocated = PETSC_TRUE;
3525   mat->assembled    = PETSC_TRUE;
3526   mat->insertmode   = NOT_SET_VALUES;
3527 
3528   a      = (Mat_MPIBAIJ*)mat->data;
3529   mat->rmap->bs  = matin->rmap->bs;
3530   a->bs2   = oldmat->bs2;
3531   a->mbs   = oldmat->mbs;
3532   a->nbs   = oldmat->nbs;
3533   a->Mbs   = oldmat->Mbs;
3534   a->Nbs   = oldmat->Nbs;
3535 
3536   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3537   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3538 
3539   a->size         = oldmat->size;
3540   a->rank         = oldmat->rank;
3541   a->donotstash   = oldmat->donotstash;
3542   a->roworiented  = oldmat->roworiented;
3543   a->rowindices   = 0;
3544   a->rowvalues    = 0;
3545   a->getrowactive = PETSC_FALSE;
3546   a->barray       = 0;
3547   a->rstartbs     = oldmat->rstartbs;
3548   a->rendbs       = oldmat->rendbs;
3549   a->cstartbs     = oldmat->cstartbs;
3550   a->cendbs       = oldmat->cendbs;
3551 
3552   /* hash table stuff */
3553   a->ht           = 0;
3554   a->hd           = 0;
3555   a->ht_size      = 0;
3556   a->ht_flag      = oldmat->ht_flag;
3557   a->ht_fact      = oldmat->ht_fact;
3558   a->ht_total_ct  = 0;
3559   a->ht_insert_ct = 0;
3560 
3561   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3562   if (oldmat->colmap) {
3563 #if defined (PETSC_USE_CTABLE)
3564   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3565 #else
3566   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3567   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3568   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3569 #endif
3570   } else a->colmap = 0;
3571 
3572   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3573     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3574     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3575     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3576   } else a->garray = 0;
3577 
3578   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3579   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3580   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3581   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3582   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3583 
3584   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3585   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3586   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3587   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3588   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3589   *newmat = mat;
3590   PetscFunctionReturn(0);
3591 }
3592 
3593 #undef __FUNCT__
3594 #define __FUNCT__ "MatLoad_MPIBAIJ"
3595 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3596 {
3597   PetscErrorCode ierr;
3598   int            fd;
3599   PetscInt       i,nz,j,rstart,rend;
3600   PetscScalar    *vals,*buf;
3601   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3602   MPI_Status     status;
3603   PetscMPIInt    rank,size,maxnz;
3604   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3605   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3606   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3607   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3608   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3609   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3610 
3611   PetscFunctionBegin;
3612   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3613     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3614   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3615 
3616   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3617   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3618   if (!rank) {
3619     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3620     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3621     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3622   }
3623 
3624   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3625 
3626   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3627   M = header[1]; N = header[2];
3628 
3629   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3630   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3631   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3632 
3633   /* If global sizes are set, check if they are consistent with that given in the file */
3634   if (sizesset) {
3635     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3636   }
3637   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3638   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3639 
3640   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3641 
3642   /*
3643      This code adds extra rows to make sure the number of rows is
3644      divisible by the blocksize
3645   */
3646   Mbs        = M/bs;
3647   extra_rows = bs - M + bs*Mbs;
3648   if (extra_rows == bs) extra_rows = 0;
3649   else                  Mbs++;
3650   if (extra_rows && !rank) {
3651     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3652   }
3653 
3654   /* determine ownership of all rows */
3655   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3656     mbs        = Mbs/size + ((Mbs % size) > rank);
3657     m          = mbs*bs;
3658   } else { /* User set */
3659     m          = newmat->rmap->n;
3660     mbs        = m/bs;
3661   }
3662   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3663   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3664 
3665   /* process 0 needs enough room for process with most rows */
3666   if (!rank) {
3667     mmax = rowners[1];
3668     for (i=2; i<=size; i++) {
3669       mmax = PetscMax(mmax,rowners[i]);
3670     }
3671     mmax*=bs;
3672   } else mmax = m;
3673 
3674   rowners[0] = 0;
3675   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3676   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3677   rstart = rowners[rank];
3678   rend   = rowners[rank+1];
3679 
3680   /* distribute row lengths to all processors */
3681   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3682   if (!rank) {
3683     mend = m;
3684     if (size == 1) mend = mend - extra_rows;
3685     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3686     for (j=mend; j<m; j++) locrowlens[j] = 1;
3687     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3688     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3689     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3690     for (j=0; j<m; j++) {
3691       procsnz[0] += locrowlens[j];
3692     }
3693     for (i=1; i<size; i++) {
3694       mend = browners[i+1] - browners[i];
3695       if (i == size-1) mend = mend - extra_rows;
3696       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3697       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3698       /* calculate the number of nonzeros on each processor */
3699       for (j=0; j<browners[i+1]-browners[i]; j++) {
3700         procsnz[i] += rowlengths[j];
3701       }
3702       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3703     }
3704     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3705   } else {
3706     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3707   }
3708 
3709   if (!rank) {
3710     /* determine max buffer needed and allocate it */
3711     maxnz = procsnz[0];
3712     for (i=1; i<size; i++) {
3713       maxnz = PetscMax(maxnz,procsnz[i]);
3714     }
3715     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3716 
3717     /* read in my part of the matrix column indices  */
3718     nz     = procsnz[0];
3719     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3720     mycols = ibuf;
3721     if (size == 1)  nz -= extra_rows;
3722     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3723     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3724 
3725     /* read in every ones (except the last) and ship off */
3726     for (i=1; i<size-1; i++) {
3727       nz   = procsnz[i];
3728       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3729       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3730     }
3731     /* read in the stuff for the last proc */
3732     if (size != 1) {
3733       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3734       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3735       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3736       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3737     }
3738     ierr = PetscFree(cols);CHKERRQ(ierr);
3739   } else {
3740     /* determine buffer space needed for message */
3741     nz = 0;
3742     for (i=0; i<m; i++) {
3743       nz += locrowlens[i];
3744     }
3745     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3746     mycols = ibuf;
3747     /* receive message of column indices*/
3748     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3749     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3750     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3751   }
3752 
3753   /* loop over local rows, determining number of off diagonal entries */
3754   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3755   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3756   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3757   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3758   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3759   rowcount = 0; nzcount = 0;
3760   for (i=0; i<mbs; i++) {
3761     dcount  = 0;
3762     odcount = 0;
3763     for (j=0; j<bs; j++) {
3764       kmax = locrowlens[rowcount];
3765       for (k=0; k<kmax; k++) {
3766         tmp = mycols[nzcount++]/bs;
3767         if (!mask[tmp]) {
3768           mask[tmp] = 1;
3769           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3770           else masked1[dcount++] = tmp;
3771         }
3772       }
3773       rowcount++;
3774     }
3775 
3776     dlens[i]  = dcount;
3777     odlens[i] = odcount;
3778 
3779     /* zero out the mask elements we set */
3780     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3781     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3782   }
3783 
3784 
3785   if (!sizesset) {
3786     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3787   }
3788   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3789 
3790   if (!rank) {
3791     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3792     /* read in my part of the matrix numerical values  */
3793     nz = procsnz[0];
3794     vals = buf;
3795     mycols = ibuf;
3796     if (size == 1)  nz -= extra_rows;
3797     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3798     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3799 
3800     /* insert into matrix */
3801     jj      = rstart*bs;
3802     for (i=0; i<m; i++) {
3803       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3804       mycols += locrowlens[i];
3805       vals   += locrowlens[i];
3806       jj++;
3807     }
3808     /* read in other processors (except the last one) and ship out */
3809     for (i=1; i<size-1; i++) {
3810       nz   = procsnz[i];
3811       vals = buf;
3812       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3813       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3814     }
3815     /* the last proc */
3816     if (size != 1) {
3817       nz   = procsnz[i] - extra_rows;
3818       vals = buf;
3819       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3820       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3821       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3822     }
3823     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3824   } else {
3825     /* receive numeric values */
3826     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3827 
3828     /* receive message of values*/
3829     vals   = buf;
3830     mycols = ibuf;
3831     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3832 
3833     /* insert into matrix */
3834     jj      = rstart*bs;
3835     for (i=0; i<m; i++) {
3836       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3837       mycols += locrowlens[i];
3838       vals   += locrowlens[i];
3839       jj++;
3840     }
3841   }
3842   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3843   ierr = PetscFree(buf);CHKERRQ(ierr);
3844   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3845   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3846   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3847   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3848   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3849   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3850   PetscFunctionReturn(0);
3851 }
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3855 /*@
3856    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3857 
3858    Input Parameters:
3859 .  mat  - the matrix
3860 .  fact - factor
3861 
3862    Not Collective, each process can use a different factor
3863 
3864    Level: advanced
3865 
3866   Notes:
3867    This can also be set by the command line option: -mat_use_hash_table <fact>
3868 
3869 .keywords: matrix, hashtable, factor, HT
3870 
3871 .seealso: MatSetOption()
3872 @*/
3873 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3874 {
3875   PetscErrorCode ierr;
3876 
3877   PetscFunctionBegin;
3878   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3879   PetscFunctionReturn(0);
3880 }
3881 
3882 EXTERN_C_BEGIN
3883 #undef __FUNCT__
3884 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3885 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3886 {
3887   Mat_MPIBAIJ *baij;
3888 
3889   PetscFunctionBegin;
3890   baij = (Mat_MPIBAIJ*)mat->data;
3891   baij->ht_fact = fact;
3892   PetscFunctionReturn(0);
3893 }
3894 EXTERN_C_END
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3898 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3899 {
3900   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3901 
3902   PetscFunctionBegin;
3903   *Ad     = a->A;
3904   *Ao     = a->B;
3905   *colmap = a->garray;
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 /*
3910     Special version for direct calls from Fortran (to eliminate two function call overheads
3911 */
3912 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3913 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3914 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3915 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3916 #endif
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3920 /*@C
3921   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3922 
3923   Collective on Mat
3924 
3925   Input Parameters:
3926 + mat - the matrix
3927 . min - number of input rows
3928 . im - input rows
3929 . nin - number of input columns
3930 . in - input columns
3931 . v - numerical values input
3932 - addvin - INSERT_VALUES or ADD_VALUES
3933 
3934   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3935 
3936   Level: advanced
3937 
3938 .seealso:   MatSetValuesBlocked()
3939 @*/
3940 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3941 {
3942   /* convert input arguments to C version */
3943   Mat             mat = *matin;
3944   PetscInt        m = *min, n = *nin;
3945   InsertMode      addv = *addvin;
3946 
3947   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3948   const MatScalar *value;
3949   MatScalar       *barray=baij->barray;
3950   PetscBool       roworiented = baij->roworiented;
3951   PetscErrorCode  ierr;
3952   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3953   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3954   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3955 
3956   PetscFunctionBegin;
3957   /* tasks normally handled by MatSetValuesBlocked() */
3958   if (mat->insertmode == NOT_SET_VALUES) {
3959     mat->insertmode = addv;
3960   }
3961 #if defined(PETSC_USE_DEBUG)
3962   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3963   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3964 #endif
3965   if (mat->assembled) {
3966     mat->was_assembled = PETSC_TRUE;
3967     mat->assembled     = PETSC_FALSE;
3968   }
3969   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3970 
3971 
3972   if (!barray) {
3973     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3974     baij->barray = barray;
3975   }
3976 
3977   if (roworiented) {
3978     stepval = (n-1)*bs;
3979   } else {
3980     stepval = (m-1)*bs;
3981   }
3982   for (i=0; i<m; i++) {
3983     if (im[i] < 0) continue;
3984 #if defined(PETSC_USE_DEBUG)
3985     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3986 #endif
3987     if (im[i] >= rstart && im[i] < rend) {
3988       row = im[i] - rstart;
3989       for (j=0; j<n; j++) {
3990         /* If NumCol = 1 then a copy is not required */
3991         if ((roworiented) && (n == 1)) {
3992           barray = (MatScalar*)v + i*bs2;
3993         } else if ((!roworiented) && (m == 1)) {
3994           barray = (MatScalar*)v + j*bs2;
3995         } else { /* Here a copy is required */
3996           if (roworiented) {
3997             value = v + i*(stepval+bs)*bs + j*bs;
3998           } else {
3999             value = v + j*(stepval+bs)*bs + i*bs;
4000           }
4001           for (ii=0; ii<bs; ii++,value+=stepval) {
4002             for (jj=0; jj<bs; jj++) {
4003               *barray++  = *value++;
4004             }
4005           }
4006           barray -=bs2;
4007         }
4008 
4009         if (in[j] >= cstart && in[j] < cend) {
4010           col  = in[j] - cstart;
4011           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4012         }
4013         else if (in[j] < 0) continue;
4014 #if defined(PETSC_USE_DEBUG)
4015         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4016 #endif
4017         else {
4018           if (mat->was_assembled) {
4019             if (!baij->colmap) {
4020               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4021             }
4022 
4023 #if defined(PETSC_USE_DEBUG)
4024 #if defined (PETSC_USE_CTABLE)
4025             { PetscInt data;
4026               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4027               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4028             }
4029 #else
4030             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4031 #endif
4032 #endif
4033 #if defined (PETSC_USE_CTABLE)
4034             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4035             col  = (col - 1)/bs;
4036 #else
4037             col = (baij->colmap[in[j]] - 1)/bs;
4038 #endif
4039             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4040               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4041               col =  in[j];
4042             }
4043           }
4044           else col = in[j];
4045           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4046         }
4047       }
4048     } else {
4049       if (!baij->donotstash) {
4050         if (roworiented) {
4051           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4052         } else {
4053           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4054         }
4055       }
4056     }
4057   }
4058 
4059   /* task normally handled by MatSetValuesBlocked() */
4060   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4061   PetscFunctionReturn(0);
4062 }
4063 
4064 #undef __FUNCT__
4065 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4066 /*@
4067      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4068          CSR format the local rows.
4069 
4070    Collective on MPI_Comm
4071 
4072    Input Parameters:
4073 +  comm - MPI communicator
4074 .  bs - the block size, only a block size of 1 is supported
4075 .  m - number of local rows (Cannot be PETSC_DECIDE)
4076 .  n - This value should be the same as the local size used in creating the
4077        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4078        calculated if N is given) For square matrices n is almost always m.
4079 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4080 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4081 .   i - row indices
4082 .   j - column indices
4083 -   a - matrix values
4084 
4085    Output Parameter:
4086 .   mat - the matrix
4087 
4088    Level: intermediate
4089 
4090    Notes:
4091        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4092      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4093      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4094 
4095        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4096 
4097 .keywords: matrix, aij, compressed row, sparse, parallel
4098 
4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4100           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4101 @*/
4102 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4103 {
4104   PetscErrorCode ierr;
4105 
4106   PetscFunctionBegin;
4107   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4108   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4109   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4110   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4111   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4112   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4113   PetscFunctionReturn(0);
4114 }
4115