xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4e6ef68f07cf4e7ffee1b93dbc438d90283d533b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929   {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #include <petscdraw.h>
937 #undef __FUNCT__
938 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
939 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
940 {
941   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
942   PetscErrorCode    ierr;
943   PetscMPIInt       size = baij->size,rank = baij->rank;
944   PetscInt          bs   = mat->rmap->bs;
945   PetscBool         iascii,isdraw;
946   PetscViewer       sviewer;
947   PetscViewerFormat format;
948 
949   PetscFunctionBegin;
950   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
952   if (iascii) {
953     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
954     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
955       MatInfo info;
956       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
957       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
960                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
961       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
963       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
965       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
967       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
968       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     } else if (format == PETSC_VIEWER_ASCII_INFO) {
971       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
972       PetscFunctionReturn(0);
973     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
974       PetscFunctionReturn(0);
975     }
976   }
977 
978   if (isdraw) {
979     PetscDraw draw;
980     PetscBool isnull;
981     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
982     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
983   }
984 
985   if (size == 1) {
986     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
987     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
988   } else {
989     /* assemble the entire matrix onto first processor. */
990     Mat         A;
991     Mat_SeqBAIJ *Aloc;
992     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
993     MatScalar   *a;
994 
995     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
996     /* Perhaps this should be the type of mat? */
997     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
998     if (!rank) {
999       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1000     } else {
1001       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1002     }
1003     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1004     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1005     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1006     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1007 
1008     /* copy over the A part */
1009     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1010     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1011     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1012 
1013     for (i=0; i<mbs; i++) {
1014       rvals[0] = bs*(baij->rstartbs + i);
1015       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1016       for (j=ai[i]; j<ai[i+1]; j++) {
1017         col = (baij->cstartbs+aj[j])*bs;
1018         for (k=0; k<bs; k++) {
1019           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1020           col++; a += bs;
1021         }
1022       }
1023     }
1024     /* copy over the B part */
1025     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1026     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1027     for (i=0; i<mbs; i++) {
1028       rvals[0] = bs*(baij->rstartbs + i);
1029       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1030       for (j=ai[i]; j<ai[i+1]; j++) {
1031         col = baij->garray[aj[j]]*bs;
1032         for (k=0; k<bs; k++) {
1033           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1034           col++; a += bs;
1035         }
1036       }
1037     }
1038     ierr = PetscFree(rvals);CHKERRQ(ierr);
1039     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041     /*
1042        Everyone has to call to draw the matrix since the graphics waits are
1043        synchronized across all processors that share the PetscDraw object
1044     */
1045     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1048       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1049       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1050       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1051     }
1052     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1053     ierr = MatDestroy(&A);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1060 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1061 {
1062   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1063   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1064   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1065   PetscErrorCode ierr;
1066   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1067   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1068   int            fd;
1069   PetscScalar    *column_values;
1070   FILE           *file;
1071   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1072   PetscInt       message_count,flowcontrolcount;
1073 
1074   PetscFunctionBegin;
1075   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1077   nz   = bs2*(A->nz + B->nz);
1078   rlen = mat->rmap->n;
1079   if (!rank) {
1080     header[0] = MAT_FILE_CLASSID;
1081     header[1] = mat->rmap->N;
1082     header[2] = mat->cmap->N;
1083 
1084     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1085     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1086     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1087     /* get largest number of rows any processor has */
1088     range = mat->rmap->range;
1089     for (i=1; i<size; i++) {
1090       rlen = PetscMax(rlen,range[i+1] - range[i]);
1091     }
1092   } else {
1093     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1094   }
1095 
1096   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1097   /* compute lengths of each row  */
1098   for (i=0; i<a->mbs; i++) {
1099     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1100   }
1101   /* store the row lengths to the file */
1102   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1103   if (!rank) {
1104     MPI_Status status;
1105     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1106     rlen = (range[1] - range[0])/bs;
1107     for (i=0; i<rlen; i++) {
1108       for (j=0; j<bs; j++) {
1109         row_lens[i*bs+j] = bs*crow_lens[i];
1110       }
1111     }
1112     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1113     for (i=1; i<size; i++) {
1114       rlen = (range[i+1] - range[i])/bs;
1115       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1116       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1117       for (k=0; k<rlen; k++) {
1118         for (j=0; j<bs; j++) {
1119           row_lens[k*bs+j] = bs*crow_lens[k];
1120         }
1121       }
1122       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     }
1124     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1125     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1126   } else {
1127     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1128     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1129     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1130   }
1131   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1132 
1133   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1134      information needed to make it for each row from a block row. This does require more communication but still not more than
1135      the communication needed for the nonzero values  */
1136   nzmax = nz; /*  space a largest processor needs */
1137   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1138   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1139   cnt   = 0;
1140   for (i=0; i<a->mbs; i++) {
1141     pcnt = cnt;
1142     for (j=B->i[i]; j<B->i[i+1]; j++) {
1143       if ((col = garray[B->j[j]]) > cstart) break;
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*col+l;
1146       }
1147     }
1148     for (k=A->i[i]; k<A->i[i+1]; k++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1151       }
1152     }
1153     for (; j<B->i[i+1]; j++) {
1154       for (l=0; l<bs; l++) {
1155         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1156       }
1157     }
1158     len = cnt - pcnt;
1159     for (k=1; k<bs; k++) {
1160       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1161       cnt += len;
1162     }
1163   }
1164   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1165 
1166   /* store the columns to the file */
1167   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1168   if (!rank) {
1169     MPI_Status status;
1170     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1183   }
1184   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1185 
1186   /* load up the numerical values */
1187   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1188   cnt  = 0;
1189   for (i=0; i<a->mbs; i++) {
1190     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if (garray[B->j[j]] > cstart) break;
1193       for (l=0; l<bs; l++) {
1194         for (ll=0; ll<bs; ll++) {
1195           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1196         }
1197       }
1198       cnt += bs;
1199     }
1200     for (k=A->i[i]; k<A->i[i+1]; k++) {
1201       for (l=0; l<bs; l++) {
1202         for (ll=0; ll<bs; ll++) {
1203           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1204         }
1205       }
1206       cnt += bs;
1207     }
1208     for (; j<B->i[i+1]; j++) {
1209       for (l=0; l<bs; l++) {
1210         for (ll=0; ll<bs; ll++) {
1211           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1212         }
1213       }
1214       cnt += bs;
1215     }
1216     cnt += (bs-1)*rlen;
1217   }
1218   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1219 
1220   /* store the column values to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1229       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1230     }
1231     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1232   } else {
1233     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1234     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1237   }
1238   ierr = PetscFree(column_values);CHKERRQ(ierr);
1239 
1240   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1241   if (file) {
1242     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatView_MPIBAIJ"
1249 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1250 {
1251   PetscErrorCode ierr;
1252   PetscBool      iascii,isdraw,issocket,isbinary;
1253 
1254   PetscFunctionBegin;
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1259   if (iascii || isdraw || issocket) {
1260     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1261   } else if (isbinary) {
1262     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1269 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1270 {
1271   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1277 #endif
1278   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1279   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1280   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1281   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1282 #if defined(PETSC_USE_CTABLE)
1283   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1284 #else
1285   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1286 #endif
1287   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1288   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1289   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1292   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1293   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1294   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1295 
1296   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatMult_MPIBAIJ"
1311 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1312 {
1313   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315   PetscInt       nt;
1316 
1317   PetscFunctionBegin;
1318   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1319   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1320   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1321   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1322   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1324   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1331 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1332 {
1333   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1339   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1346 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1347 {
1348   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350   PetscBool      merged;
1351 
1352   PetscFunctionBegin;
1353   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1354   /* do nondiagonal part */
1355   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1356   if (!merged) {
1357     /* send it on its way */
1358     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359     /* do local part */
1360     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1361     /* receive remote parts: note this assumes the values are not actually */
1362     /* inserted in yy until the next line */
1363     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364   } else {
1365     /* do local part */
1366     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1367     /* send it on its way */
1368     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369     /* values actually were received in the Begin() but we need to call this nop */
1370     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1377 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1378 {
1379   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   /* do nondiagonal part */
1384   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1385   /* send it on its way */
1386   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1387   /* do local part */
1388   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1389   /* receive remote parts: note this assumes the values are not actually */
1390   /* inserted in yy until the next line, which is true for my implementation*/
1391   /* but is not perhaps always true. */
1392   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 /*
1397   This only works correctly for square matrices where the subblock A->A is the
1398    diagonal block
1399 */
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1402 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1409   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatScale_MPIBAIJ"
1415 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1422   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1428 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1429 {
1430   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1431   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1432   PetscErrorCode ierr;
1433   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1434   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1435   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1436 
1437   PetscFunctionBegin;
1438   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1439   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1440   mat->getrowactive = PETSC_TRUE;
1441 
1442   if (!mat->rowvalues && (idx || v)) {
1443     /*
1444         allocate enough space to hold information from the longest row.
1445     */
1446     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1447     PetscInt    max = 1,mbs = mat->mbs,tmp;
1448     for (i=0; i<mbs; i++) {
1449       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1450       if (max < tmp) max = tmp;
1451     }
1452     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1453   }
1454   lrow = row - brstart;
1455 
1456   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1457   if (!v)   {pvA = 0; pvB = 0;}
1458   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1459   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1460   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1461   nztot = nzA + nzB;
1462 
1463   cmap = mat->garray;
1464   if (v  || idx) {
1465     if (nztot) {
1466       /* Sort by increasing column numbers, assuming A and B already sorted */
1467       PetscInt imark = -1;
1468       if (v) {
1469         *v = v_p = mat->rowvalues;
1470         for (i=0; i<nzB; i++) {
1471           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1472           else break;
1473         }
1474         imark = i;
1475         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1476         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1477       }
1478       if (idx) {
1479         *idx = idx_p = mat->rowindices;
1480         if (imark > -1) {
1481           for (i=0; i<imark; i++) {
1482             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1483           }
1484         } else {
1485           for (i=0; i<nzB; i++) {
1486             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1487             else break;
1488           }
1489           imark = i;
1490         }
1491         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1492         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1493       }
1494     } else {
1495       if (idx) *idx = 0;
1496       if (v)   *v   = 0;
1497     }
1498   }
1499   *nz  = nztot;
1500   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1507 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1508 {
1509   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1510 
1511   PetscFunctionBegin;
1512   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1513   baij->getrowactive = PETSC_FALSE;
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1519 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1520 {
1521   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1526   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1532 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1533 {
1534   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1535   Mat            A  = a->A,B = a->B;
1536   PetscErrorCode ierr;
1537   PetscReal      isend[5],irecv[5];
1538 
1539   PetscFunctionBegin;
1540   info->block_size = (PetscReal)matin->rmap->bs;
1541 
1542   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1545   isend[3] = info->memory;  isend[4] = info->mallocs;
1546 
1547   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1548 
1549   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1550   isend[3] += info->memory;  isend[4] += info->mallocs;
1551 
1552   if (flag == MAT_LOCAL) {
1553     info->nz_used      = isend[0];
1554     info->nz_allocated = isend[1];
1555     info->nz_unneeded  = isend[2];
1556     info->memory       = isend[3];
1557     info->mallocs      = isend[4];
1558   } else if (flag == MAT_GLOBAL_MAX) {
1559     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1560 
1561     info->nz_used      = irecv[0];
1562     info->nz_allocated = irecv[1];
1563     info->nz_unneeded  = irecv[2];
1564     info->memory       = irecv[3];
1565     info->mallocs      = irecv[4];
1566   } else if (flag == MAT_GLOBAL_SUM) {
1567     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1568 
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1575   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1576   info->fill_ratio_needed = 0;
1577   info->factor_mallocs    = 0;
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1583 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1584 {
1585   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   switch (op) {
1590   case MAT_NEW_NONZERO_LOCATIONS:
1591   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1592   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1593   case MAT_KEEP_NONZERO_PATTERN:
1594   case MAT_NEW_NONZERO_LOCATION_ERR:
1595     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1596     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1597     break;
1598   case MAT_ROW_ORIENTED:
1599     a->roworiented = flg;
1600 
1601     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1602     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1603     break;
1604   case MAT_NEW_DIAGONALS:
1605     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1606     break;
1607   case MAT_IGNORE_OFF_PROC_ENTRIES:
1608     a->donotstash = flg;
1609     break;
1610   case MAT_USE_HASH_TABLE:
1611     a->ht_flag = flg;
1612     break;
1613   case MAT_SYMMETRIC:
1614   case MAT_STRUCTURALLY_SYMMETRIC:
1615   case MAT_HERMITIAN:
1616   case MAT_SYMMETRY_ETERNAL:
1617     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1618     break;
1619   default:
1620     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1627 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1628 {
1629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1630   Mat_SeqBAIJ    *Aloc;
1631   Mat            B;
1632   PetscErrorCode ierr;
1633   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1634   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1635   MatScalar      *a;
1636 
1637   PetscFunctionBegin;
1638   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1639   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1640     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1641     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1642     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1643     /* Do not know preallocation information, but must set block size */
1644     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1645   } else {
1646     B = *matout;
1647   }
1648 
1649   /* copy over the A part */
1650   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1651   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1652   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1653 
1654   for (i=0; i<mbs; i++) {
1655     rvals[0] = bs*(baij->rstartbs + i);
1656     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1657     for (j=ai[i]; j<ai[i+1]; j++) {
1658       col = (baij->cstartbs+aj[j])*bs;
1659       for (k=0; k<bs; k++) {
1660         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1661 
1662         col++; a += bs;
1663       }
1664     }
1665   }
1666   /* copy over the B part */
1667   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1668   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1669   for (i=0; i<mbs; i++) {
1670     rvals[0] = bs*(baij->rstartbs + i);
1671     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1672     for (j=ai[i]; j<ai[i+1]; j++) {
1673       col = baij->garray[aj[j]]*bs;
1674       for (k=0; k<bs; k++) {
1675         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1676         col++;
1677         a += bs;
1678       }
1679     }
1680   }
1681   ierr = PetscFree(rvals);CHKERRQ(ierr);
1682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684 
1685   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1686   else {
1687     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1688   }
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 #undef __FUNCT__
1693 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1694 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1695 {
1696   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1697   Mat            a     = baij->A,b = baij->B;
1698   PetscErrorCode ierr;
1699   PetscInt       s1,s2,s3;
1700 
1701   PetscFunctionBegin;
1702   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1703   if (rr) {
1704     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1705     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1706     /* Overlap communication with computation. */
1707     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708   }
1709   if (ll) {
1710     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1711     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1712     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1713   }
1714   /* scale  the diagonal block */
1715   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1716 
1717   if (rr) {
1718     /* Do a scatter end and then right scale the off-diagonal block */
1719     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1720     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1727 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1728 {
1729   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1730   PetscInt      *owners = A->rmap->range;
1731   PetscInt       n      = A->rmap->n;
1732   PetscMPIInt    size   = l->size;
1733   PetscSF        sf;
1734   PetscInt      *lrows;
1735   PetscSFNode   *rrows;
1736   PetscInt       lastidx = -1, r, p = 0, len = 0;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   /* Create SF where leaves are input rows and roots are owned rows */
1741   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1742   for (r = 0; r < n; ++r) lrows[r] = -1;
1743   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1744   for (r = 0; r < N; ++r) {
1745     const PetscInt idx   = rows[r];
1746     PetscBool      found = PETSC_FALSE;
1747     /* Trick for efficient searching for sorted rows */
1748     if (lastidx > idx) p = 0;
1749     lastidx = idx;
1750     for (; p < size; ++p) {
1751       if (idx >= owners[p] && idx < owners[p+1]) {
1752         rrows[r].rank  = p;
1753         rrows[r].index = rows[r] - owners[p];
1754         found = PETSC_TRUE;
1755         break;
1756       }
1757     }
1758     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1759   }
1760   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1761   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1762   /* Collect flags for rows to be zeroed */
1763   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1764   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766   /* Compress and put in row numbers */
1767   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1768   /* fix right hand side if needed */
1769   if (x && b) {
1770     const PetscScalar *xx;
1771     PetscScalar       *bb;
1772 
1773     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1774     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1775     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1776     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1777     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1778   }
1779 
1780   /* actually zap the local rows */
1781   /*
1782         Zero the required rows. If the "diagonal block" of the matrix
1783      is square and the user wishes to set the diagonal we use separate
1784      code so that MatSetValues() is not called for each diagonal allocating
1785      new memory, thus calling lots of mallocs and slowing things down.
1786 
1787   */
1788   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1789   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1790   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1792   } else if (diag != 0.0) {
1793     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1794     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1795        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1796     for (r = 0; r < len; ++r) {
1797       const PetscInt row = lrows[r] + A->rmap->rstart;
1798       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1799     }
1800     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1801     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1802   } else {
1803     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1804   }
1805   ierr = PetscFree(lrows);CHKERRQ(ierr);
1806   {
1807     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1808     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1815 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1816 {
1817   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1818   PetscErrorCode    ierr;
1819   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1820   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1821   PetscInt          *lrows,*owners = A->rmap->range;
1822   PetscSFNode       *rrows;
1823   PetscSF           sf;
1824   const PetscScalar *xx;
1825   PetscScalar       *bb,*mask;
1826   Vec               xmask,lmask;
1827   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1828   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1829   PetscScalar       *aa;
1830 #if defined(PETSC_DEBUG)
1831   PetscBool found = PETSC_FALSE;
1832 #endif
1833 
1834   PetscFunctionBegin;
1835   /* Create SF where leaves are input rows and roots are owned rows */
1836   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1837   for (r = 0; r < n; ++r) lrows[r] = -1;
1838   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1839   for (r = 0; r < N; ++r) {
1840     const PetscInt idx   = rows[r];
1841     PetscBool      found = PETSC_FALSE;
1842     /* Trick for efficient searching for sorted rows */
1843     if (lastidx > idx) p = 0;
1844     lastidx = idx;
1845     for (; p < size; ++p) {
1846       if (idx >= owners[p] && idx < owners[p+1]) {
1847         rrows[r].rank  = p;
1848         rrows[r].index = rows[r] - owners[p];
1849         found = PETSC_TRUE;
1850         break;
1851       }
1852     }
1853     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1854   }
1855   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1856   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1857   /* Collect flags for rows to be zeroed */
1858   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1859   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1860   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1861   /* Compress and put in row numbers */
1862   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1863   /* zero diagonal part of matrix */
1864   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1865   /* handle off diagonal part of matrix */
1866   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1867   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1868   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1869   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1870   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1871   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1872   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1873   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1874   if (x) {
1875     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1876     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1877     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1878     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1879   }
1880   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1881   /* remove zeroed rows of off diagonal matrix */
1882   for (i = 0; i < len; ++i) {
1883     row   = lrows[i];
1884     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1885     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1886     for (k = 0; k < count; ++k) {
1887       aa[0] = 0.0;
1888       aa   += bs;
1889     }
1890   }
1891   /* loop over all elements of off process part of matrix zeroing removed columns*/
1892   for (i = 0; i < l->B->rmap->N; ++i) {
1893     row = i/bs;
1894     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1895       for (k = 0; k < bs; ++k) {
1896         col = bs*baij->j[j] + k;
1897         if (PetscAbsScalar(mask[col])) {
1898           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1899           if (b) bb[i] -= aa[0]*xx[col];
1900           aa[0] = 0.0;
1901         }
1902       }
1903     }
1904   }
1905   if (x) {
1906     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1907     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1908   }
1909   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1910   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1911   ierr = PetscFree(lrows);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1917 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1918 {
1919   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1920   PetscErrorCode ierr;
1921 
1922   PetscFunctionBegin;
1923   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatEqual_MPIBAIJ"
1931 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1932 {
1933   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1934   Mat            a,b,c,d;
1935   PetscBool      flg;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   a = matA->A; b = matA->B;
1940   c = matB->A; d = matB->B;
1941 
1942   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1943   if (flg) {
1944     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1945   }
1946   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatCopy_MPIBAIJ"
1952 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1953 {
1954   PetscErrorCode ierr;
1955   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1956   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1957 
1958   PetscFunctionBegin;
1959   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1960   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1961     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1962   } else {
1963     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1964     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1971 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1972 {
1973   PetscErrorCode ierr;
1974 
1975   PetscFunctionBegin;
1976   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1982 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1983 {
1984   PetscErrorCode ierr;
1985   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1986   PetscBLASInt   bnz,one=1;
1987   Mat_SeqBAIJ    *x,*y;
1988 
1989   PetscFunctionBegin;
1990   if (str == SAME_NONZERO_PATTERN) {
1991     PetscScalar alpha = a;
1992     x    = (Mat_SeqBAIJ*)xx->A->data;
1993     y    = (Mat_SeqBAIJ*)yy->A->data;
1994     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1995     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1996     x    = (Mat_SeqBAIJ*)xx->B->data;
1997     y    = (Mat_SeqBAIJ*)yy->B->data;
1998     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1999     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2000   } else {
2001     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2002   }
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2008 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2009 {
2010   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2011   PetscErrorCode ierr;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2015   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2021 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2022 {
2023   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2024   PetscErrorCode ierr;
2025 
2026   PetscFunctionBegin;
2027   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2028   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2034 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2035 {
2036   PetscErrorCode ierr;
2037   IS             iscol_local;
2038   PetscInt       csize;
2039 
2040   PetscFunctionBegin;
2041   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2042   if (call == MAT_REUSE_MATRIX) {
2043     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2044     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2045   } else {
2046     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2047   }
2048   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2049   if (call == MAT_INITIAL_MATRIX) {
2050     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2051     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2052   }
2053   PetscFunctionReturn(0);
2054 }
2055 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2058 /*
2059   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2060   in local and then by concatenating the local matrices the end result.
2061   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2062 */
2063 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2064 {
2065   PetscErrorCode ierr;
2066   PetscMPIInt    rank,size;
2067   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2068   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2069   Mat            M,Mreuse;
2070   MatScalar      *vwork,*aa;
2071   MPI_Comm       comm;
2072   IS             isrow_new, iscol_new;
2073   PetscBool      idflag,allrows, allcols;
2074   Mat_SeqBAIJ    *aij;
2075 
2076   PetscFunctionBegin;
2077   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2080   /* The compression and expansion should be avoided. Doesn't point
2081      out errors, might change the indices, hence buggey */
2082   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2083   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2084 
2085   /* Check for special case: each processor gets entire matrix columns */
2086   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2087   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2088   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2089   else allcols = PETSC_FALSE;
2090 
2091   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2092   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2093   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2094   else allrows = PETSC_FALSE;
2095 
2096   if (call ==  MAT_REUSE_MATRIX) {
2097     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2098     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2099     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2100   } else {
2101     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2102   }
2103   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2104   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2105   /*
2106       m - number of local rows
2107       n - number of columns (same on all processors)
2108       rstart - first row in new global matrix generated
2109   */
2110   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2111   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2112   m    = m/bs;
2113   n    = n/bs;
2114 
2115   if (call == MAT_INITIAL_MATRIX) {
2116     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2117     ii  = aij->i;
2118     jj  = aij->j;
2119 
2120     /*
2121         Determine the number of non-zeros in the diagonal and off-diagonal
2122         portions of the matrix in order to do correct preallocation
2123     */
2124 
2125     /* first get start and end of "diagonal" columns */
2126     if (csize == PETSC_DECIDE) {
2127       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2128       if (mglobal == n*bs) { /* square matrix */
2129         nlocal = m;
2130       } else {
2131         nlocal = n/size + ((n % size) > rank);
2132       }
2133     } else {
2134       nlocal = csize/bs;
2135     }
2136     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2137     rstart = rend - nlocal;
2138     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2139 
2140     /* next, compute all the lengths */
2141     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2142     for (i=0; i<m; i++) {
2143       jend = ii[i+1] - ii[i];
2144       olen = 0;
2145       dlen = 0;
2146       for (j=0; j<jend; j++) {
2147         if (*jj < rstart || *jj >= rend) olen++;
2148         else dlen++;
2149         jj++;
2150       }
2151       olens[i] = olen;
2152       dlens[i] = dlen;
2153     }
2154     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2155     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2156     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2157     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2158     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2159   } else {
2160     PetscInt ml,nl;
2161 
2162     M    = *newmat;
2163     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2164     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2165     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2166     /*
2167          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2168        rather than the slower MatSetValues().
2169     */
2170     M->was_assembled = PETSC_TRUE;
2171     M->assembled     = PETSC_FALSE;
2172   }
2173   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2174   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2175   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2176   ii   = aij->i;
2177   jj   = aij->j;
2178   aa   = aij->a;
2179   for (i=0; i<m; i++) {
2180     row   = rstart/bs + i;
2181     nz    = ii[i+1] - ii[i];
2182     cwork = jj;     jj += nz;
2183     vwork = aa;     aa += nz*bs*bs;
2184     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2185   }
2186 
2187   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2188   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2189   *newmat = M;
2190 
2191   /* save submatrix used in processor for next request */
2192   if (call ==  MAT_INITIAL_MATRIX) {
2193     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2194     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatPermute_MPIBAIJ"
2201 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2202 {
2203   MPI_Comm       comm,pcomm;
2204   PetscInt       clocal_size,nrows;
2205   const PetscInt *rows;
2206   PetscMPIInt    size;
2207   IS             crowp,lcolp;
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2212   /* make a collective version of 'rowp' */
2213   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2214   if (pcomm==comm) {
2215     crowp = rowp;
2216   } else {
2217     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2218     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2219     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2220     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2221   }
2222   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2223   /* make a local version of 'colp' */
2224   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2225   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2226   if (size==1) {
2227     lcolp = colp;
2228   } else {
2229     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2230   }
2231   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2232   /* now we just get the submatrix */
2233   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2234   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2235   /* clean up */
2236   if (pcomm!=comm) {
2237     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2238   }
2239   if (size>1) {
2240     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2241   }
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2247 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2248 {
2249   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2250   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2251 
2252   PetscFunctionBegin;
2253   if (nghosts) *nghosts = B->nbs;
2254   if (ghosts) *ghosts = baij->garray;
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2260 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2261 {
2262   Mat            B;
2263   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2264   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2265   Mat_SeqAIJ     *b;
2266   PetscErrorCode ierr;
2267   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2268   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2269   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2270 
2271   PetscFunctionBegin;
2272   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2273   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2274 
2275   /* ----------------------------------------------------------------
2276      Tell every processor the number of nonzeros per row
2277   */
2278   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2279   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2280     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2281   }
2282   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2283   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2284   displs    = recvcounts + size;
2285   for (i=0; i<size; i++) {
2286     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2287     displs[i]     = A->rmap->range[i]/bs;
2288   }
2289 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2290   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2291 #else
2292   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2293 #endif
2294   /* ---------------------------------------------------------------
2295      Create the sequential matrix of the same type as the local block diagonal
2296   */
2297   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2298   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2299   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2300   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2301   b    = (Mat_SeqAIJ*)B->data;
2302 
2303   /*--------------------------------------------------------------------
2304     Copy my part of matrix column indices over
2305   */
2306   sendcount  = ad->nz + bd->nz;
2307   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2308   a_jsendbuf = ad->j;
2309   b_jsendbuf = bd->j;
2310   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2311   cnt        = 0;
2312   for (i=0; i<n; i++) {
2313 
2314     /* put in lower diagonal portion */
2315     m = bd->i[i+1] - bd->i[i];
2316     while (m > 0) {
2317       /* is it above diagonal (in bd (compressed) numbering) */
2318       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2319       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2320       m--;
2321     }
2322 
2323     /* put in diagonal portion */
2324     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2325       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2326     }
2327 
2328     /* put in upper diagonal portion */
2329     while (m-- > 0) {
2330       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2331     }
2332   }
2333   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2334 
2335   /*--------------------------------------------------------------------
2336     Gather all column indices to all processors
2337   */
2338   for (i=0; i<size; i++) {
2339     recvcounts[i] = 0;
2340     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2341       recvcounts[i] += lens[j];
2342     }
2343   }
2344   displs[0] = 0;
2345   for (i=1; i<size; i++) {
2346     displs[i] = displs[i-1] + recvcounts[i-1];
2347   }
2348 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2349   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2350 #else
2351   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2352 #endif
2353   /*--------------------------------------------------------------------
2354     Assemble the matrix into useable form (note numerical values not yet set)
2355   */
2356   /* set the b->ilen (length of each row) values */
2357   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2358   /* set the b->i indices */
2359   b->i[0] = 0;
2360   for (i=1; i<=A->rmap->N/bs; i++) {
2361     b->i[i] = b->i[i-1] + lens[i-1];
2362   }
2363   ierr = PetscFree(lens);CHKERRQ(ierr);
2364   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2366   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2367 
2368   if (A->symmetric) {
2369     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2370   } else if (A->hermitian) {
2371     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2372   } else if (A->structurally_symmetric) {
2373     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2374   }
2375   *newmat = B;
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatSOR_MPIBAIJ"
2381 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2382 {
2383   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2384   PetscErrorCode ierr;
2385   Vec            bb1 = 0;
2386 
2387   PetscFunctionBegin;
2388   if (flag == SOR_APPLY_UPPER) {
2389     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2390     PetscFunctionReturn(0);
2391   }
2392 
2393   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2394     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2395   }
2396 
2397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2398     if (flag & SOR_ZERO_INITIAL_GUESS) {
2399       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2400       its--;
2401     }
2402 
2403     while (its--) {
2404       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2405       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2406 
2407       /* update rhs: bb1 = bb - B*x */
2408       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2409       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2410 
2411       /* local sweep */
2412       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2413     }
2414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2415     if (flag & SOR_ZERO_INITIAL_GUESS) {
2416       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2417       its--;
2418     }
2419     while (its--) {
2420       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2421       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2422 
2423       /* update rhs: bb1 = bb - B*x */
2424       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2425       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2426 
2427       /* local sweep */
2428       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2429     }
2430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2431     if (flag & SOR_ZERO_INITIAL_GUESS) {
2432       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2433       its--;
2434     }
2435     while (its--) {
2436       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2437       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2438 
2439       /* update rhs: bb1 = bb - B*x */
2440       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2441       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2442 
2443       /* local sweep */
2444       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2445     }
2446   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2447 
2448   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2454 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2455 {
2456   PetscErrorCode ierr;
2457   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2458   PetscInt       N,i,*garray = aij->garray;
2459   PetscInt       ib,jb,bs = A->rmap->bs;
2460   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2461   MatScalar      *a_val = a_aij->a;
2462   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2463   MatScalar      *b_val = b_aij->a;
2464   PetscReal      *work;
2465 
2466   PetscFunctionBegin;
2467   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2468   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2469   if (type == NORM_2) {
2470     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2471       for (jb=0; jb<bs; jb++) {
2472         for (ib=0; ib<bs; ib++) {
2473           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2474           a_val++;
2475         }
2476       }
2477     }
2478     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2479       for (jb=0; jb<bs; jb++) {
2480         for (ib=0; ib<bs; ib++) {
2481           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2482           b_val++;
2483         }
2484       }
2485     }
2486   } else if (type == NORM_1) {
2487     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2488       for (jb=0; jb<bs; jb++) {
2489         for (ib=0; ib<bs; ib++) {
2490           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2491           a_val++;
2492         }
2493       }
2494     }
2495     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2496       for (jb=0; jb<bs; jb++) {
2497        for (ib=0; ib<bs; ib++) {
2498           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2499           b_val++;
2500         }
2501       }
2502     }
2503   } else if (type == NORM_INFINITY) {
2504     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2505       for (jb=0; jb<bs; jb++) {
2506         for (ib=0; ib<bs; ib++) {
2507           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2508           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2509           a_val++;
2510         }
2511       }
2512     }
2513     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2514       for (jb=0; jb<bs; jb++) {
2515         for (ib=0; ib<bs; ib++) {
2516           int col = garray[b_aij->j[i]] * bs + jb;
2517           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2518           b_val++;
2519         }
2520       }
2521     }
2522   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2523   if (type == NORM_INFINITY) {
2524     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2525   } else {
2526     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2527   }
2528   ierr = PetscFree(work);CHKERRQ(ierr);
2529   if (type == NORM_2) {
2530     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2531   }
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2537 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2538 {
2539   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2540   PetscErrorCode ierr;
2541 
2542   PetscFunctionBegin;
2543   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 
2548 /* -------------------------------------------------------------------*/
2549 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2550                                        MatGetRow_MPIBAIJ,
2551                                        MatRestoreRow_MPIBAIJ,
2552                                        MatMult_MPIBAIJ,
2553                                 /* 4*/ MatMultAdd_MPIBAIJ,
2554                                        MatMultTranspose_MPIBAIJ,
2555                                        MatMultTransposeAdd_MPIBAIJ,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                 /*10*/ 0,
2560                                        0,
2561                                        0,
2562                                        MatSOR_MPIBAIJ,
2563                                        MatTranspose_MPIBAIJ,
2564                                 /*15*/ MatGetInfo_MPIBAIJ,
2565                                        MatEqual_MPIBAIJ,
2566                                        MatGetDiagonal_MPIBAIJ,
2567                                        MatDiagonalScale_MPIBAIJ,
2568                                        MatNorm_MPIBAIJ,
2569                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2570                                        MatAssemblyEnd_MPIBAIJ,
2571                                        MatSetOption_MPIBAIJ,
2572                                        MatZeroEntries_MPIBAIJ,
2573                                 /*24*/ MatZeroRows_MPIBAIJ,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                 /*29*/ MatSetUp_MPIBAIJ,
2579                                        0,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                 /*34*/ MatDuplicate_MPIBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                 /*39*/ MatAXPY_MPIBAIJ,
2589                                        MatGetSubMatrices_MPIBAIJ,
2590                                        MatIncreaseOverlap_MPIBAIJ,
2591                                        MatGetValues_MPIBAIJ,
2592                                        MatCopy_MPIBAIJ,
2593                                 /*44*/ 0,
2594                                        MatScale_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        MatZeroRowsColumns_MPIBAIJ,
2598                                 /*49*/ 0,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2604                                        0,
2605                                        MatSetUnfactored_MPIBAIJ,
2606                                        MatPermute_MPIBAIJ,
2607                                        MatSetValuesBlocked_MPIBAIJ,
2608                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2609                                        MatDestroy_MPIBAIJ,
2610                                        MatView_MPIBAIJ,
2611                                        0,
2612                                        0,
2613                                 /*64*/ 0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                 /*74*/ 0,
2624                                        MatFDColoringApply_BAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                 /*79*/ 0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        MatLoad_MPIBAIJ,
2633                                 /*84*/ 0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*89*/ 0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                 /*94*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*99*/ 0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*104*/0,
2654                                        MatRealPart_MPIBAIJ,
2655                                        MatImaginaryPart_MPIBAIJ,
2656                                        0,
2657                                        0,
2658                                 /*109*/0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2664                                        0,
2665                                        MatGetGhosts_MPIBAIJ,
2666                                        0,
2667                                        0,
2668                                 /*119*/0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        MatGetMultiProcBlock_MPIBAIJ,
2673                                 /*124*/0,
2674                                        MatGetColumnNorms_MPIBAIJ,
2675                                        MatInvertBlockDiagonal_MPIBAIJ,
2676                                        0,
2677                                        0,
2678                                /*129*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /*134*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /*139*/ 0,
2689                                        0,
2690                                        0,
2691                                        MatFDColoringSetUp_MPIXAIJ
2692 };
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2696 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2697 {
2698   PetscFunctionBegin;
2699   *a = ((Mat_MPIBAIJ*)A->data)->A;
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2707 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2708 {
2709   PetscInt       m,rstart,cstart,cend;
2710   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2711   const PetscInt *JJ    =0;
2712   PetscScalar    *values=0;
2713   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2718   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2719   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2720   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2721   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2722   m      = B->rmap->n/bs;
2723   rstart = B->rmap->rstart/bs;
2724   cstart = B->cmap->rstart/bs;
2725   cend   = B->cmap->rend/bs;
2726 
2727   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2728   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2729   for (i=0; i<m; i++) {
2730     nz = ii[i+1] - ii[i];
2731     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2732     nz_max = PetscMax(nz_max,nz);
2733     JJ     = jj + ii[i];
2734     for (j=0; j<nz; j++) {
2735       if (*JJ >= cstart) break;
2736       JJ++;
2737     }
2738     d = 0;
2739     for (; j<nz; j++) {
2740       if (*JJ++ >= cend) break;
2741       d++;
2742     }
2743     d_nnz[i] = d;
2744     o_nnz[i] = nz - d;
2745   }
2746   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2747   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2748 
2749   values = (PetscScalar*)V;
2750   if (!values) {
2751     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2752     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2753   }
2754   for (i=0; i<m; i++) {
2755     PetscInt          row    = i + rstart;
2756     PetscInt          ncols  = ii[i+1] - ii[i];
2757     const PetscInt    *icols = jj + ii[i];
2758     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2759       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2760       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2761     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2762       PetscInt j;
2763       for (j=0; j<ncols; j++) {
2764         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2765         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2766       }
2767     }
2768   }
2769 
2770   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2771   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2772   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2779 /*@C
2780    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2781    (the default parallel PETSc format).
2782 
2783    Collective on MPI_Comm
2784 
2785    Input Parameters:
2786 +  A - the matrix
2787 .  bs - the block size
2788 .  i - the indices into j for the start of each local row (starts with zero)
2789 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2790 -  v - optional values in the matrix
2791 
2792    Level: developer
2793 
2794    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2795    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2796    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2797    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2798    block column and the second index is over columns within a block.
2799 
2800 .keywords: matrix, aij, compressed row, sparse, parallel
2801 
2802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2803 @*/
2804 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2805 {
2806   PetscErrorCode ierr;
2807 
2808   PetscFunctionBegin;
2809   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2810   PetscValidType(B,1);
2811   PetscValidLogicalCollectiveInt(B,bs,2);
2812   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2818 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2819 {
2820   Mat_MPIBAIJ    *b;
2821   PetscErrorCode ierr;
2822   PetscInt       i;
2823 
2824   PetscFunctionBegin;
2825   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2826   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2827   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2828   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2829 
2830   if (d_nnz) {
2831     for (i=0; i<B->rmap->n/bs; i++) {
2832       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2833     }
2834   }
2835   if (o_nnz) {
2836     for (i=0; i<B->rmap->n/bs; i++) {
2837       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2838     }
2839   }
2840 
2841   b      = (Mat_MPIBAIJ*)B->data;
2842   b->bs2 = bs*bs;
2843   b->mbs = B->rmap->n/bs;
2844   b->nbs = B->cmap->n/bs;
2845   b->Mbs = B->rmap->N/bs;
2846   b->Nbs = B->cmap->N/bs;
2847 
2848   for (i=0; i<=b->size; i++) {
2849     b->rangebs[i] = B->rmap->range[i]/bs;
2850   }
2851   b->rstartbs = B->rmap->rstart/bs;
2852   b->rendbs   = B->rmap->rend/bs;
2853   b->cstartbs = B->cmap->rstart/bs;
2854   b->cendbs   = B->cmap->rend/bs;
2855 
2856   if (!B->preallocated) {
2857     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2858     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2859     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2860     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2861     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2862     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2863     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2864     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2865     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2866   }
2867 
2868   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2869   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2870   B->preallocated = PETSC_TRUE;
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2875 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2876 
2877 #undef __FUNCT__
2878 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2879 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2880 {
2881   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2882   PetscErrorCode ierr;
2883   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2884   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2885   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2886 
2887   PetscFunctionBegin;
2888   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2889   ii[0] = 0;
2890   for (i=0; i<M; i++) {
2891     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2892     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2893     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2894     /* remove one from count of matrix has diagonal */
2895     for (j=id[i]; j<id[i+1]; j++) {
2896       if (jd[j] == i) {ii[i+1]--;break;}
2897     }
2898   }
2899   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2900   cnt  = 0;
2901   for (i=0; i<M; i++) {
2902     for (j=io[i]; j<io[i+1]; j++) {
2903       if (garray[jo[j]] > rstart) break;
2904       jj[cnt++] = garray[jo[j]];
2905     }
2906     for (k=id[i]; k<id[i+1]; k++) {
2907       if (jd[k] != i) {
2908         jj[cnt++] = rstart + jd[k];
2909       }
2910     }
2911     for (; j<io[i+1]; j++) {
2912       jj[cnt++] = garray[jo[j]];
2913     }
2914   }
2915   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2920 
2921 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2922 
2923 #undef __FUNCT__
2924 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2925 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2926 {
2927   PetscErrorCode ierr;
2928   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2929   Mat            B;
2930   Mat_MPIAIJ     *b;
2931 
2932   PetscFunctionBegin;
2933   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2934 
2935   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2936   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2937   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2938   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2939   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2940   b    = (Mat_MPIAIJ*) B->data;
2941 
2942   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2943   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2944   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2945   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2946   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2949   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2950   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2951   if (reuse == MAT_REUSE_MATRIX) {
2952     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2953   } else {
2954    *newmat = B;
2955   }
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 #if defined(PETSC_HAVE_MUMPS)
2960 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2961 #endif
2962 
2963 /*MC
2964    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2965 
2966    Options Database Keys:
2967 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2968 . -mat_block_size <bs> - set the blocksize used to store the matrix
2969 - -mat_use_hash_table <fact>
2970 
2971   Level: beginner
2972 
2973 .seealso: MatCreateMPIBAIJ
2974 M*/
2975 
2976 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2977 
2978 #undef __FUNCT__
2979 #define __FUNCT__ "MatCreate_MPIBAIJ"
2980 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2981 {
2982   Mat_MPIBAIJ    *b;
2983   PetscErrorCode ierr;
2984   PetscBool      flg;
2985 
2986   PetscFunctionBegin;
2987   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2988   B->data = (void*)b;
2989 
2990   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2991   B->assembled = PETSC_FALSE;
2992 
2993   B->insertmode = NOT_SET_VALUES;
2994   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2995   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2996 
2997   /* build local table of row and column ownerships */
2998   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
2999 
3000   /* build cache for off array entries formed */
3001   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3002 
3003   b->donotstash  = PETSC_FALSE;
3004   b->colmap      = NULL;
3005   b->garray      = NULL;
3006   b->roworiented = PETSC_TRUE;
3007 
3008   /* stuff used in block assembly */
3009   b->barray = 0;
3010 
3011   /* stuff used for matrix vector multiply */
3012   b->lvec  = 0;
3013   b->Mvctx = 0;
3014 
3015   /* stuff for MatGetRow() */
3016   b->rowindices   = 0;
3017   b->rowvalues    = 0;
3018   b->getrowactive = PETSC_FALSE;
3019 
3020   /* hash table stuff */
3021   b->ht           = 0;
3022   b->hd           = 0;
3023   b->ht_size      = 0;
3024   b->ht_flag      = PETSC_FALSE;
3025   b->ht_fact      = 0;
3026   b->ht_total_ct  = 0;
3027   b->ht_insert_ct = 0;
3028 
3029   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3030   b->ijonly = PETSC_FALSE;
3031 
3032   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3033   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3034   if (flg) {
3035     PetscReal fact = 1.39;
3036     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3037     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3038     if (fact <= 1.0) fact = 1.39;
3039     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3040     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3041   }
3042   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3043 
3044 #if defined(PETSC_HAVE_MUMPS)
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3046 #endif
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3058   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 /*MC
3063    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3064 
3065    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3066    and MATMPIBAIJ otherwise.
3067 
3068    Options Database Keys:
3069 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3070 
3071   Level: beginner
3072 
3073 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3074 M*/
3075 
3076 #undef __FUNCT__
3077 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3078 /*@C
3079    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3080    (block compressed row).  For good matrix assembly performance
3081    the user should preallocate the matrix storage by setting the parameters
3082    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3083    performance can be increased by more than a factor of 50.
3084 
3085    Collective on Mat
3086 
3087    Input Parameters:
3088 +  A - the matrix
3089 .  bs   - size of block
3090 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3091            submatrix  (same for all local rows)
3092 .  d_nnz - array containing the number of block nonzeros in the various block rows
3093            of the in diagonal portion of the local (possibly different for each block
3094            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3095            set it even if it is zero.
3096 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3097            submatrix (same for all local rows).
3098 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3099            off-diagonal portion of the local submatrix (possibly different for
3100            each block row) or NULL.
3101 
3102    If the *_nnz parameter is given then the *_nz parameter is ignored
3103 
3104    Options Database Keys:
3105 +   -mat_block_size - size of the blocks to use
3106 -   -mat_use_hash_table <fact>
3107 
3108    Notes:
3109    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3110    than it must be used on all processors that share the object for that argument.
3111 
3112    Storage Information:
3113    For a square global matrix we define each processor's diagonal portion
3114    to be its local rows and the corresponding columns (a square submatrix);
3115    each processor's off-diagonal portion encompasses the remainder of the
3116    local matrix (a rectangular submatrix).
3117 
3118    The user can specify preallocated storage for the diagonal part of
3119    the local submatrix with either d_nz or d_nnz (not both).  Set
3120    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3121    memory allocation.  Likewise, specify preallocated storage for the
3122    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3123 
3124    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3125    the figure below we depict these three local rows and all columns (0-11).
3126 
3127 .vb
3128            0 1 2 3 4 5 6 7 8 9 10 11
3129           --------------------------
3130    row 3  |o o o d d d o o o o  o  o
3131    row 4  |o o o d d d o o o o  o  o
3132    row 5  |o o o d d d o o o o  o  o
3133           --------------------------
3134 .ve
3135 
3136    Thus, any entries in the d locations are stored in the d (diagonal)
3137    submatrix, and any entries in the o locations are stored in the
3138    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3139    stored simply in the MATSEQBAIJ format for compressed row storage.
3140 
3141    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3142    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3143    In general, for PDE problems in which most nonzeros are near the diagonal,
3144    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3145    or you will get TERRIBLE performance; see the users' manual chapter on
3146    matrices.
3147 
3148    You can call MatGetInfo() to get information on how effective the preallocation was;
3149    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3150    You can also run with the option -info and look for messages with the string
3151    malloc in them to see if additional memory allocation was needed.
3152 
3153    Level: intermediate
3154 
3155 .keywords: matrix, block, aij, compressed row, sparse, parallel
3156 
3157 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3158 @*/
3159 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3160 {
3161   PetscErrorCode ierr;
3162 
3163   PetscFunctionBegin;
3164   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3165   PetscValidType(B,1);
3166   PetscValidLogicalCollectiveInt(B,bs,2);
3167   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatCreateBAIJ"
3173 /*@C
3174    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3175    (block compressed row).  For good matrix assembly performance
3176    the user should preallocate the matrix storage by setting the parameters
3177    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3178    performance can be increased by more than a factor of 50.
3179 
3180    Collective on MPI_Comm
3181 
3182    Input Parameters:
3183 +  comm - MPI communicator
3184 .  bs   - size of blockk
3185 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3186            This value should be the same as the local size used in creating the
3187            y vector for the matrix-vector product y = Ax.
3188 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3189            This value should be the same as the local size used in creating the
3190            x vector for the matrix-vector product y = Ax.
3191 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3192 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3193 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3194            submatrix  (same for all local rows)
3195 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3196            of the in diagonal portion of the local (possibly different for each block
3197            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3198            and set it even if it is zero.
3199 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3200            submatrix (same for all local rows).
3201 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3202            off-diagonal portion of the local submatrix (possibly different for
3203            each block row) or NULL.
3204 
3205    Output Parameter:
3206 .  A - the matrix
3207 
3208    Options Database Keys:
3209 +   -mat_block_size - size of the blocks to use
3210 -   -mat_use_hash_table <fact>
3211 
3212    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3213    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3214    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3215 
3216    Notes:
3217    If the *_nnz parameter is given then the *_nz parameter is ignored
3218 
3219    A nonzero block is any block that as 1 or more nonzeros in it
3220 
3221    The user MUST specify either the local or global matrix dimensions
3222    (possibly both).
3223 
3224    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3225    than it must be used on all processors that share the object for that argument.
3226 
3227    Storage Information:
3228    For a square global matrix we define each processor's diagonal portion
3229    to be its local rows and the corresponding columns (a square submatrix);
3230    each processor's off-diagonal portion encompasses the remainder of the
3231    local matrix (a rectangular submatrix).
3232 
3233    The user can specify preallocated storage for the diagonal part of
3234    the local submatrix with either d_nz or d_nnz (not both).  Set
3235    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3236    memory allocation.  Likewise, specify preallocated storage for the
3237    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3238 
3239    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3240    the figure below we depict these three local rows and all columns (0-11).
3241 
3242 .vb
3243            0 1 2 3 4 5 6 7 8 9 10 11
3244           --------------------------
3245    row 3  |o o o d d d o o o o  o  o
3246    row 4  |o o o d d d o o o o  o  o
3247    row 5  |o o o d d d o o o o  o  o
3248           --------------------------
3249 .ve
3250 
3251    Thus, any entries in the d locations are stored in the d (diagonal)
3252    submatrix, and any entries in the o locations are stored in the
3253    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3254    stored simply in the MATSEQBAIJ format for compressed row storage.
3255 
3256    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3257    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3258    In general, for PDE problems in which most nonzeros are near the diagonal,
3259    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3260    or you will get TERRIBLE performance; see the users' manual chapter on
3261    matrices.
3262 
3263    Level: intermediate
3264 
3265 .keywords: matrix, block, aij, compressed row, sparse, parallel
3266 
3267 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3268 @*/
3269 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3270 {
3271   PetscErrorCode ierr;
3272   PetscMPIInt    size;
3273 
3274   PetscFunctionBegin;
3275   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3276   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3277   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3278   if (size > 1) {
3279     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3280     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3281   } else {
3282     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3283     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3284   }
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3290 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3291 {
3292   Mat            mat;
3293   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3294   PetscErrorCode ierr;
3295   PetscInt       len=0;
3296 
3297   PetscFunctionBegin;
3298   *newmat = 0;
3299   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3300   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3301   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3302   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3303 
3304   mat->factortype   = matin->factortype;
3305   mat->preallocated = PETSC_TRUE;
3306   mat->assembled    = PETSC_TRUE;
3307   mat->insertmode   = NOT_SET_VALUES;
3308 
3309   a             = (Mat_MPIBAIJ*)mat->data;
3310   mat->rmap->bs = matin->rmap->bs;
3311   a->bs2        = oldmat->bs2;
3312   a->mbs        = oldmat->mbs;
3313   a->nbs        = oldmat->nbs;
3314   a->Mbs        = oldmat->Mbs;
3315   a->Nbs        = oldmat->Nbs;
3316 
3317   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3318   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3319 
3320   a->size         = oldmat->size;
3321   a->rank         = oldmat->rank;
3322   a->donotstash   = oldmat->donotstash;
3323   a->roworiented  = oldmat->roworiented;
3324   a->rowindices   = 0;
3325   a->rowvalues    = 0;
3326   a->getrowactive = PETSC_FALSE;
3327   a->barray       = 0;
3328   a->rstartbs     = oldmat->rstartbs;
3329   a->rendbs       = oldmat->rendbs;
3330   a->cstartbs     = oldmat->cstartbs;
3331   a->cendbs       = oldmat->cendbs;
3332 
3333   /* hash table stuff */
3334   a->ht           = 0;
3335   a->hd           = 0;
3336   a->ht_size      = 0;
3337   a->ht_flag      = oldmat->ht_flag;
3338   a->ht_fact      = oldmat->ht_fact;
3339   a->ht_total_ct  = 0;
3340   a->ht_insert_ct = 0;
3341 
3342   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3343   if (oldmat->colmap) {
3344 #if defined(PETSC_USE_CTABLE)
3345     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3346 #else
3347     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3348     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3349     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3350 #endif
3351   } else a->colmap = 0;
3352 
3353   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3354     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3355     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3356     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3357   } else a->garray = 0;
3358 
3359   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3360   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3361   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3362   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3363   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3364 
3365   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3366   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3367   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3368   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3369   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3370   *newmat = mat;
3371   PetscFunctionReturn(0);
3372 }
3373 
3374 #undef __FUNCT__
3375 #define __FUNCT__ "MatLoad_MPIBAIJ"
3376 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3377 {
3378   PetscErrorCode ierr;
3379   int            fd;
3380   PetscInt       i,nz,j,rstart,rend;
3381   PetscScalar    *vals,*buf;
3382   MPI_Comm       comm;
3383   MPI_Status     status;
3384   PetscMPIInt    rank,size,maxnz;
3385   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3386   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3387   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3388   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3389   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3390   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3391 
3392   PetscFunctionBegin;
3393   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3394   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3395   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3396   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3397 
3398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3399   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3400   if (!rank) {
3401     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3402     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3403     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3404   }
3405 
3406   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3407 
3408   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3409   M    = header[1]; N = header[2];
3410 
3411   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3412   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3413   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3414 
3415   /* If global sizes are set, check if they are consistent with that given in the file */
3416   if (sizesset) {
3417     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3418   }
3419   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3420   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3421 
3422   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3423 
3424   /*
3425      This code adds extra rows to make sure the number of rows is
3426      divisible by the blocksize
3427   */
3428   Mbs        = M/bs;
3429   extra_rows = bs - M + bs*Mbs;
3430   if (extra_rows == bs) extra_rows = 0;
3431   else                  Mbs++;
3432   if (extra_rows && !rank) {
3433     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3434   }
3435 
3436   /* determine ownership of all rows */
3437   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3438     mbs = Mbs/size + ((Mbs % size) > rank);
3439     m   = mbs*bs;
3440   } else { /* User set */
3441     m   = newmat->rmap->n;
3442     mbs = m/bs;
3443   }
3444   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3445   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3446 
3447   /* process 0 needs enough room for process with most rows */
3448   if (!rank) {
3449     mmax = rowners[1];
3450     for (i=2; i<=size; i++) {
3451       mmax = PetscMax(mmax,rowners[i]);
3452     }
3453     mmax*=bs;
3454   } else mmax = -1;             /* unused, but compiler warns anyway */
3455 
3456   rowners[0] = 0;
3457   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3458   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3459   rstart = rowners[rank];
3460   rend   = rowners[rank+1];
3461 
3462   /* distribute row lengths to all processors */
3463   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3464   if (!rank) {
3465     mend = m;
3466     if (size == 1) mend = mend - extra_rows;
3467     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3468     for (j=mend; j<m; j++) locrowlens[j] = 1;
3469     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3470     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3471     for (j=0; j<m; j++) {
3472       procsnz[0] += locrowlens[j];
3473     }
3474     for (i=1; i<size; i++) {
3475       mend = browners[i+1] - browners[i];
3476       if (i == size-1) mend = mend - extra_rows;
3477       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3478       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3479       /* calculate the number of nonzeros on each processor */
3480       for (j=0; j<browners[i+1]-browners[i]; j++) {
3481         procsnz[i] += rowlengths[j];
3482       }
3483       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3484     }
3485     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3486   } else {
3487     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3488   }
3489 
3490   if (!rank) {
3491     /* determine max buffer needed and allocate it */
3492     maxnz = procsnz[0];
3493     for (i=1; i<size; i++) {
3494       maxnz = PetscMax(maxnz,procsnz[i]);
3495     }
3496     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3497 
3498     /* read in my part of the matrix column indices  */
3499     nz     = procsnz[0];
3500     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3501     mycols = ibuf;
3502     if (size == 1) nz -= extra_rows;
3503     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3504     if (size == 1) {
3505       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3506     }
3507 
3508     /* read in every ones (except the last) and ship off */
3509     for (i=1; i<size-1; i++) {
3510       nz   = procsnz[i];
3511       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3512       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3513     }
3514     /* read in the stuff for the last proc */
3515     if (size != 1) {
3516       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3517       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3518       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3519       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3520     }
3521     ierr = PetscFree(cols);CHKERRQ(ierr);
3522   } else {
3523     /* determine buffer space needed for message */
3524     nz = 0;
3525     for (i=0; i<m; i++) {
3526       nz += locrowlens[i];
3527     }
3528     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3529     mycols = ibuf;
3530     /* receive message of column indices*/
3531     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3532     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3533     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3534   }
3535 
3536   /* loop over local rows, determining number of off diagonal entries */
3537   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3538   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3539   rowcount = 0; nzcount = 0;
3540   for (i=0; i<mbs; i++) {
3541     dcount  = 0;
3542     odcount = 0;
3543     for (j=0; j<bs; j++) {
3544       kmax = locrowlens[rowcount];
3545       for (k=0; k<kmax; k++) {
3546         tmp = mycols[nzcount++]/bs;
3547         if (!mask[tmp]) {
3548           mask[tmp] = 1;
3549           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3550           else masked1[dcount++] = tmp;
3551         }
3552       }
3553       rowcount++;
3554     }
3555 
3556     dlens[i]  = dcount;
3557     odlens[i] = odcount;
3558 
3559     /* zero out the mask elements we set */
3560     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3561     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3562   }
3563 
3564 
3565   if (!sizesset) {
3566     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3567   }
3568   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3569 
3570   if (!rank) {
3571     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3572     /* read in my part of the matrix numerical values  */
3573     nz     = procsnz[0];
3574     vals   = buf;
3575     mycols = ibuf;
3576     if (size == 1) nz -= extra_rows;
3577     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3578     if (size == 1) {
3579       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3580     }
3581 
3582     /* insert into matrix */
3583     jj = rstart*bs;
3584     for (i=0; i<m; i++) {
3585       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3586       mycols += locrowlens[i];
3587       vals   += locrowlens[i];
3588       jj++;
3589     }
3590     /* read in other processors (except the last one) and ship out */
3591     for (i=1; i<size-1; i++) {
3592       nz   = procsnz[i];
3593       vals = buf;
3594       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3595       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3596     }
3597     /* the last proc */
3598     if (size != 1) {
3599       nz   = procsnz[i] - extra_rows;
3600       vals = buf;
3601       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3602       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3603       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3604     }
3605     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3606   } else {
3607     /* receive numeric values */
3608     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3609 
3610     /* receive message of values*/
3611     vals   = buf;
3612     mycols = ibuf;
3613     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3614 
3615     /* insert into matrix */
3616     jj = rstart*bs;
3617     for (i=0; i<m; i++) {
3618       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3619       mycols += locrowlens[i];
3620       vals   += locrowlens[i];
3621       jj++;
3622     }
3623   }
3624   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3625   ierr = PetscFree(buf);CHKERRQ(ierr);
3626   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3627   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3628   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3629   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3630   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   PetscFunctionReturn(0);
3633 }
3634 
3635 #undef __FUNCT__
3636 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3637 /*@
3638    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3639 
3640    Input Parameters:
3641 .  mat  - the matrix
3642 .  fact - factor
3643 
3644    Not Collective, each process can use a different factor
3645 
3646    Level: advanced
3647 
3648   Notes:
3649    This can also be set by the command line option: -mat_use_hash_table <fact>
3650 
3651 .keywords: matrix, hashtable, factor, HT
3652 
3653 .seealso: MatSetOption()
3654 @*/
3655 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3656 {
3657   PetscErrorCode ierr;
3658 
3659   PetscFunctionBegin;
3660   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3661   PetscFunctionReturn(0);
3662 }
3663 
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3666 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3667 {
3668   Mat_MPIBAIJ *baij;
3669 
3670   PetscFunctionBegin;
3671   baij          = (Mat_MPIBAIJ*)mat->data;
3672   baij->ht_fact = fact;
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3678 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3679 {
3680   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3681 
3682   PetscFunctionBegin;
3683   *Ad     = a->A;
3684   *Ao     = a->B;
3685   *colmap = a->garray;
3686   PetscFunctionReturn(0);
3687 }
3688 
3689 /*
3690     Special version for direct calls from Fortran (to eliminate two function call overheads
3691 */
3692 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3693 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3694 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3695 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3696 #endif
3697 
3698 #undef __FUNCT__
3699 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3700 /*@C
3701   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3702 
3703   Collective on Mat
3704 
3705   Input Parameters:
3706 + mat - the matrix
3707 . min - number of input rows
3708 . im - input rows
3709 . nin - number of input columns
3710 . in - input columns
3711 . v - numerical values input
3712 - addvin - INSERT_VALUES or ADD_VALUES
3713 
3714   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3715 
3716   Level: advanced
3717 
3718 .seealso:   MatSetValuesBlocked()
3719 @*/
3720 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3721 {
3722   /* convert input arguments to C version */
3723   Mat        mat  = *matin;
3724   PetscInt   m    = *min, n = *nin;
3725   InsertMode addv = *addvin;
3726 
3727   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3728   const MatScalar *value;
3729   MatScalar       *barray     = baij->barray;
3730   PetscBool       roworiented = baij->roworiented;
3731   PetscErrorCode  ierr;
3732   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3733   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3734   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3735 
3736   PetscFunctionBegin;
3737   /* tasks normally handled by MatSetValuesBlocked() */
3738   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3739 #if defined(PETSC_USE_DEBUG)
3740   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3741   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3742 #endif
3743   if (mat->assembled) {
3744     mat->was_assembled = PETSC_TRUE;
3745     mat->assembled     = PETSC_FALSE;
3746   }
3747   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3748 
3749 
3750   if (!barray) {
3751     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3752     baij->barray = barray;
3753   }
3754 
3755   if (roworiented) stepval = (n-1)*bs;
3756   else stepval = (m-1)*bs;
3757 
3758   for (i=0; i<m; i++) {
3759     if (im[i] < 0) continue;
3760 #if defined(PETSC_USE_DEBUG)
3761     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3762 #endif
3763     if (im[i] >= rstart && im[i] < rend) {
3764       row = im[i] - rstart;
3765       for (j=0; j<n; j++) {
3766         /* If NumCol = 1 then a copy is not required */
3767         if ((roworiented) && (n == 1)) {
3768           barray = (MatScalar*)v + i*bs2;
3769         } else if ((!roworiented) && (m == 1)) {
3770           barray = (MatScalar*)v + j*bs2;
3771         } else { /* Here a copy is required */
3772           if (roworiented) {
3773             value = v + i*(stepval+bs)*bs + j*bs;
3774           } else {
3775             value = v + j*(stepval+bs)*bs + i*bs;
3776           }
3777           for (ii=0; ii<bs; ii++,value+=stepval) {
3778             for (jj=0; jj<bs; jj++) {
3779               *barray++ = *value++;
3780             }
3781           }
3782           barray -=bs2;
3783         }
3784 
3785         if (in[j] >= cstart && in[j] < cend) {
3786           col  = in[j] - cstart;
3787           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3788         } else if (in[j] < 0) continue;
3789 #if defined(PETSC_USE_DEBUG)
3790         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3791 #endif
3792         else {
3793           if (mat->was_assembled) {
3794             if (!baij->colmap) {
3795               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3796             }
3797 
3798 #if defined(PETSC_USE_DEBUG)
3799 #if defined(PETSC_USE_CTABLE)
3800             { PetscInt data;
3801               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3802               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3803             }
3804 #else
3805             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3806 #endif
3807 #endif
3808 #if defined(PETSC_USE_CTABLE)
3809             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3810             col  = (col - 1)/bs;
3811 #else
3812             col = (baij->colmap[in[j]] - 1)/bs;
3813 #endif
3814             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3815               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3816               col  =  in[j];
3817             }
3818           } else col = in[j];
3819           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3820         }
3821       }
3822     } else {
3823       if (!baij->donotstash) {
3824         if (roworiented) {
3825           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3826         } else {
3827           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3828         }
3829       }
3830     }
3831   }
3832 
3833   /* task normally handled by MatSetValuesBlocked() */
3834   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3840 /*@
3841      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3842          CSR format the local rows.
3843 
3844    Collective on MPI_Comm
3845 
3846    Input Parameters:
3847 +  comm - MPI communicator
3848 .  bs - the block size, only a block size of 1 is supported
3849 .  m - number of local rows (Cannot be PETSC_DECIDE)
3850 .  n - This value should be the same as the local size used in creating the
3851        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3852        calculated if N is given) For square matrices n is almost always m.
3853 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3854 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3855 .   i - row indices
3856 .   j - column indices
3857 -   a - matrix values
3858 
3859    Output Parameter:
3860 .   mat - the matrix
3861 
3862    Level: intermediate
3863 
3864    Notes:
3865        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3866      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3867      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3868 
3869      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3870      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3871      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3872      with column-major ordering within blocks.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876 .keywords: matrix, aij, compressed row, sparse, parallel
3877 
3878 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3879           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3880 @*/
3881 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3882 {
3883   PetscErrorCode ierr;
3884 
3885   PetscFunctionBegin;
3886   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3887   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3888   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3889   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3890   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3891   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3892   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3893   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896